From 7c69ca05026be5faa0c4868d6bdcfa70d03aee5c Mon Sep 17 00:00:00 2001 From: Yingwen Date: Tue, 10 Dec 2024 21:10:37 +0800 Subject: [PATCH 01/59] chore: bump main branch version to 0.12 (#5133) chore: bump version to v0.12.0 --- .github/workflows/release.yml | 2 +- Cargo.lock | 146 +++++++++++++++++----------------- Cargo.toml | 2 +- 3 files changed, 75 insertions(+), 75 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 4f32298a8ba2..3f46ef1a7bda 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -91,7 +91,7 @@ env: # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313; NIGHTLY_RELEASE_PREFIX: nightly # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release. - NEXT_RELEASE_VERSION: v0.11.0 + NEXT_RELEASE_VERSION: v0.12.0 # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs permissions: diff --git a/Cargo.lock b/Cargo.lock index 920393daa030..177625a65955 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,7 +188,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c" [[package]] name = "api" -version = "0.11.0" +version = "0.12.0" dependencies = [ "common-base", "common-decimal", @@ -749,7 +749,7 @@ dependencies = [ [[package]] name = "auth" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -1340,7 +1340,7 @@ dependencies = [ [[package]] name = "cache" -version = "0.11.0" +version = "0.12.0" dependencies = [ "catalog", "common-error", @@ -1348,7 +1348,7 @@ dependencies = [ "common-meta", "moka", "snafu 0.8.5", - "substrait 0.11.0", + "substrait 0.12.0", ] [[package]] @@ -1375,7 +1375,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "catalog" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arrow", @@ -1714,7 +1714,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "cli" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "auth", @@ -1758,7 +1758,7 @@ dependencies = [ "session", "snafu 0.8.5", "store-api", - "substrait 0.11.0", + "substrait 0.12.0", "table", "temp-env", "tempfile", @@ -1768,7 +1768,7 @@ dependencies = [ [[package]] name = "client" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arc-swap", @@ -1797,7 +1797,7 @@ dependencies = [ "rand", "serde_json", "snafu 0.8.5", - "substrait 0.11.0", + "substrait 0.12.0", "substrait 0.37.3", "tokio", "tokio-stream", @@ -1838,7 +1838,7 @@ dependencies = [ [[package]] name = "cmd" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "auth", @@ -1898,7 +1898,7 @@ dependencies = [ "similar-asserts", "snafu 0.8.5", "store-api", - "substrait 0.11.0", + "substrait 0.12.0", "table", "temp-env", "tempfile", @@ -1944,7 +1944,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335" [[package]] name = "common-base" -version = "0.11.0" +version = "0.12.0" dependencies = [ "anymap2", "async-trait", @@ -1965,7 +1965,7 @@ dependencies = [ [[package]] name = "common-catalog" -version = "0.11.0" +version = "0.12.0" dependencies = [ "chrono", "common-error", @@ -1976,7 +1976,7 @@ dependencies = [ [[package]] name = "common-config" -version = "0.11.0" +version = "0.12.0" dependencies = [ "common-base", "common-error", @@ -1999,7 +1999,7 @@ dependencies = [ [[package]] name = "common-datasource" -version = "0.11.0" +version = "0.12.0" dependencies = [ "arrow", "arrow-schema", @@ -2036,7 +2036,7 @@ dependencies = [ [[package]] name = "common-decimal" -version = "0.11.0" +version = "0.12.0" dependencies = [ "bigdecimal 0.4.5", "common-error", @@ -2049,7 +2049,7 @@ dependencies = [ [[package]] name = "common-error" -version = "0.11.0" +version = "0.12.0" dependencies = [ "snafu 0.8.5", "strum 0.25.0", @@ -2058,7 +2058,7 @@ dependencies = [ [[package]] name = "common-frontend" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -2073,7 +2073,7 @@ dependencies = [ [[package]] name = "common-function" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "approx 0.5.1", @@ -2118,7 +2118,7 @@ dependencies = [ [[package]] name = "common-greptimedb-telemetry" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "common-runtime", @@ -2135,7 +2135,7 @@ dependencies = [ [[package]] name = "common-grpc" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arrow-flight", @@ -2161,7 +2161,7 @@ dependencies = [ [[package]] name = "common-grpc-expr" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "common-base", @@ -2180,7 +2180,7 @@ dependencies = [ [[package]] name = "common-macro" -version = "0.11.0" +version = "0.12.0" dependencies = [ "arc-swap", "common-query", @@ -2194,7 +2194,7 @@ dependencies = [ [[package]] name = "common-mem-prof" -version = "0.11.0" +version = "0.12.0" dependencies = [ "common-error", "common-macro", @@ -2207,7 +2207,7 @@ dependencies = [ [[package]] name = "common-meta" -version = "0.11.0" +version = "0.12.0" dependencies = [ "anymap2", "api", @@ -2264,7 +2264,7 @@ dependencies = [ [[package]] name = "common-options" -version = "0.11.0" +version = "0.12.0" dependencies = [ "common-grpc", "humantime-serde", @@ -2273,11 +2273,11 @@ dependencies = [ [[package]] name = "common-plugins" -version = "0.11.0" +version = "0.12.0" [[package]] name = "common-pprof" -version = "0.11.0" +version = "0.12.0" dependencies = [ "common-error", "common-macro", @@ -2289,7 +2289,7 @@ dependencies = [ [[package]] name = "common-procedure" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-stream", "async-trait", @@ -2316,7 +2316,7 @@ dependencies = [ [[package]] name = "common-procedure-test" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "common-procedure", @@ -2324,7 +2324,7 @@ dependencies = [ [[package]] name = "common-query" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -2350,7 +2350,7 @@ dependencies = [ [[package]] name = "common-recordbatch" -version = "0.11.0" +version = "0.12.0" dependencies = [ "arc-swap", "common-error", @@ -2369,7 +2369,7 @@ dependencies = [ [[package]] name = "common-runtime" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "clap 4.5.19", @@ -2399,7 +2399,7 @@ dependencies = [ [[package]] name = "common-telemetry" -version = "0.11.0" +version = "0.12.0" dependencies = [ "atty", "backtrace", @@ -2427,7 +2427,7 @@ dependencies = [ [[package]] name = "common-test-util" -version = "0.11.0" +version = "0.12.0" dependencies = [ "client", "common-query", @@ -2439,7 +2439,7 @@ dependencies = [ [[package]] name = "common-time" -version = "0.11.0" +version = "0.12.0" dependencies = [ "arrow", "chrono", @@ -2457,7 +2457,7 @@ dependencies = [ [[package]] name = "common-version" -version = "0.11.0" +version = "0.12.0" dependencies = [ "build-data", "const_format", @@ -2467,7 +2467,7 @@ dependencies = [ [[package]] name = "common-wal" -version = "0.11.0" +version = "0.12.0" dependencies = [ "common-base", "common-error", @@ -3276,7 +3276,7 @@ dependencies = [ [[package]] name = "datanode" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arrow-flight", @@ -3327,7 +3327,7 @@ dependencies = [ "session", "snafu 0.8.5", "store-api", - "substrait 0.11.0", + "substrait 0.12.0", "table", "tokio", "toml 0.8.19", @@ -3336,7 +3336,7 @@ dependencies = [ [[package]] name = "datatypes" -version = "0.11.0" +version = "0.12.0" dependencies = [ "arrow", "arrow-array", @@ -3954,7 +3954,7 @@ dependencies = [ [[package]] name = "file-engine" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -4071,7 +4071,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" [[package]] name = "flow" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arrow", @@ -4128,7 +4128,7 @@ dependencies = [ "snafu 0.8.5", "store-api", "strum 0.25.0", - "substrait 0.11.0", + "substrait 0.12.0", "table", "tokio", "tonic 0.11.0", @@ -4175,7 +4175,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" [[package]] name = "frontend" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arc-swap", @@ -5315,7 +5315,7 @@ dependencies = [ [[package]] name = "index" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "asynchronous-codec", @@ -6150,7 +6150,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "log-query" -version = "0.11.0" +version = "0.12.0" dependencies = [ "chrono", "common-error", @@ -6161,7 +6161,7 @@ dependencies = [ [[package]] name = "log-store" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-stream", "async-trait", @@ -6482,7 +6482,7 @@ dependencies = [ [[package]] name = "meta-client" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -6509,7 +6509,7 @@ dependencies = [ [[package]] name = "meta-srv" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -6588,7 +6588,7 @@ dependencies = [ [[package]] name = "metric-engine" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "aquamarine", @@ -6692,7 +6692,7 @@ dependencies = [ [[package]] name = "mito2" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "aquamarine", @@ -7404,7 +7404,7 @@ dependencies = [ [[package]] name = "object-store" -version = "0.11.0" +version = "0.12.0" dependencies = [ "anyhow", "bytes", @@ -7657,7 +7657,7 @@ dependencies = [ [[package]] name = "operator" -version = "0.11.0" +version = "0.12.0" dependencies = [ "ahash 0.8.11", "api", @@ -7705,7 +7705,7 @@ dependencies = [ "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "store-api", - "substrait 0.11.0", + "substrait 0.12.0", "table", "tokio", "tokio-util", @@ -7955,7 +7955,7 @@ dependencies = [ [[package]] name = "partition" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -8241,7 +8241,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pipeline" -version = "0.11.0" +version = "0.12.0" dependencies = [ "ahash 0.8.11", "api", @@ -8404,7 +8404,7 @@ dependencies = [ [[package]] name = "plugins" -version = "0.11.0" +version = "0.12.0" dependencies = [ "auth", "clap 4.5.19", @@ -8681,7 +8681,7 @@ dependencies = [ [[package]] name = "promql" -version = "0.11.0" +version = "0.12.0" dependencies = [ "ahash 0.8.11", "async-trait", @@ -8919,7 +8919,7 @@ dependencies = [ [[package]] name = "puffin" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-compression 0.4.13", "async-trait", @@ -9043,7 +9043,7 @@ dependencies = [ [[package]] name = "query" -version = "0.11.0" +version = "0.12.0" dependencies = [ "ahash 0.8.11", "api", @@ -9110,7 +9110,7 @@ dependencies = [ "stats-cli", "store-api", "streaming-stats", - "substrait 0.11.0", + "substrait 0.12.0", "table", "tokio", "tokio-stream", @@ -10572,7 +10572,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "script" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arc-swap", @@ -10866,7 +10866,7 @@ dependencies = [ [[package]] name = "servers" -version = "0.11.0" +version = "0.12.0" dependencies = [ "ahash 0.8.11", "api", @@ -10979,7 +10979,7 @@ dependencies = [ [[package]] name = "session" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arc-swap", @@ -11327,7 +11327,7 @@ dependencies = [ [[package]] name = "sql" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "chrono", @@ -11390,7 +11390,7 @@ dependencies = [ [[package]] name = "sqlness-runner" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "clap 4.5.19", @@ -11610,7 +11610,7 @@ dependencies = [ [[package]] name = "store-api" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "aquamarine", @@ -11781,7 +11781,7 @@ dependencies = [ [[package]] name = "substrait" -version = "0.11.0" +version = "0.12.0" dependencies = [ "async-trait", "bytes", @@ -11980,7 +11980,7 @@ dependencies = [ [[package]] name = "table" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "async-trait", @@ -12246,7 +12246,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "tests-fuzz" -version = "0.11.0" +version = "0.12.0" dependencies = [ "arbitrary", "async-trait", @@ -12288,7 +12288,7 @@ dependencies = [ [[package]] name = "tests-integration" -version = "0.11.0" +version = "0.12.0" dependencies = [ "api", "arrow-flight", @@ -12352,7 +12352,7 @@ dependencies = [ "sql", "sqlx", "store-api", - "substrait 0.11.0", + "substrait 0.12.0", "table", "tempfile", "time", diff --git a/Cargo.toml b/Cargo.toml index 4cc07cd89818..d1d360850e70 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,7 +68,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.11.0" +version = "0.12.0" edition = "2021" license = "Apache-2.0" From 3d1b8c4fac9bf2252213a20eceb8c95104f22dd2 Mon Sep 17 00:00:00 2001 From: shuiyisong <113876041+shuiyisong@users.noreply.github.com> Date: Wed, 11 Dec 2024 10:56:48 +0800 Subject: [PATCH 02/59] chore: add `/ready` api for health checking (#5124) * chore: add ready endpoint for health checking * chore: add test --- src/servers/src/http.rs | 13 +++++++++---- tests-integration/tests/http.rs | 33 +++++++++++++++++++-------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index c719e02cac35..d8d07ed31fa0 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -638,10 +638,15 @@ impl HttpServer { router.clone() }; - router = router.route( - "/health", - routing::get(handler::health).post(handler::health), - ); + router = router + .route( + "/health", + routing::get(handler::health).post(handler::health), + ) + .route( + "/ready", + routing::get(handler::health).post(handler::health), + ); router = router.route("/status", routing::get(handler::status)); diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 083a9daa1a26..9d7b81f3919b 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -757,21 +757,26 @@ pub async fn test_health_api(store_type: StorageType) { let (app, _guard) = setup_test_http_app_with_frontend(store_type, "health_api").await; let client = TestClient::new(app); - // we can call health api with both `GET` and `POST` method. - let res_post = client.post("/health").send().await; - assert_eq!(res_post.status(), StatusCode::OK); - let res_get = client.get("/health").send().await; - assert_eq!(res_get.status(), StatusCode::OK); - - // both `GET` and `POST` method return same result - let body_text = res_post.text().await; - assert_eq!(body_text, res_get.text().await); - - // currently health api simply returns an empty json `{}`, which can be deserialized to an empty `HealthResponse` - assert_eq!(body_text, "{}"); + async fn health_api(client: &TestClient, endpoint: &str) { + // we can call health api with both `GET` and `POST` method. + let res_post = client.post(endpoint).send().await; + assert_eq!(res_post.status(), StatusCode::OK); + let res_get = client.get(endpoint).send().await; + assert_eq!(res_get.status(), StatusCode::OK); + + // both `GET` and `POST` method return same result + let body_text = res_post.text().await; + assert_eq!(body_text, res_get.text().await); + + // currently health api simply returns an empty json `{}`, which can be deserialized to an empty `HealthResponse` + assert_eq!(body_text, "{}"); + + let body = serde_json::from_str::(&body_text).unwrap(); + assert_eq!(body, HealthResponse {}); + } - let body = serde_json::from_str::(&body_text).unwrap(); - assert_eq!(body, HealthResponse {}); + health_api(&client, "/health").await; + health_api(&client, "/ready").await; } pub async fn test_status_api(store_type: StorageType) { From d91517688ab4ad010f71cae01a50de883c7967bb Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Wed, 11 Dec 2024 15:02:03 +0800 Subject: [PATCH 03/59] chore: fix aws_lc not in depend tree check in CI (#5121) * chore: fix aws_lc check in CI * chore: update lock file --- .github/cargo-blacklist.txt | 1 + .github/workflows/develop.yml | 7 ---- Cargo.lock | 61 ++--------------------------------- src/servers/Cargo.toml | 5 --- 4 files changed, 3 insertions(+), 71 deletions(-) diff --git a/.github/cargo-blacklist.txt b/.github/cargo-blacklist.txt index 32e7878a86db..d2f071130ee9 100644 --- a/.github/cargo-blacklist.txt +++ b/.github/cargo-blacklist.txt @@ -1,2 +1,3 @@ native-tls openssl +aws-lc-sys diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 80a4f042c0f7..6eccbe65b811 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -269,13 +269,6 @@ jobs: - name: Install cargo-gc-bin shell: bash run: cargo install cargo-gc-bin - - name: Check aws-lc-sys will not build - shell: bash - run: | - if cargo tree -i aws-lc-sys -e features | grep -q aws-lc-sys; then - echo "Found aws-lc-sys, which has compilation problems on older gcc versions. Please replace it with ring until its building experience improves." - exit 1 - fi - name: Build greptime bianry shell: bash # `cargo gc` will invoke `cargo build` with specified args diff --git a/Cargo.lock b/Cargo.lock index 177625a65955..628c6a582418 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -798,21 +798,6 @@ dependencies = [ "cc", ] -[[package]] -name = "aws-lc-sys" -version = "0.21.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3ddc4a5b231dd6958b140ff3151b6412b3f4321fab354f399eec8f14b06df62" -dependencies = [ - "bindgen 0.69.4", - "cc", - "cmake", - "dunce", - "fs_extra", - "libc", - "paste", -] - [[package]] name = "axum" version = "0.6.20" @@ -993,29 +978,6 @@ dependencies = [ "serde", ] -[[package]] -name = "bindgen" -version = "0.69.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a00dc851838a2120612785d195287475a3ac45514741da670b735818822129a0" -dependencies = [ - "bitflags 2.6.0", - "cexpr", - "clang-sys", - "itertools 0.12.1", - "lazy_static", - "lazycell", - "log", - "prettyplease", - "proc-macro2", - "quote", - "regex", - "rustc-hash 1.1.0", - "shlex", - "syn 2.0.90", - "which", -] - [[package]] name = "bindgen" version = "0.70.1" @@ -3719,12 +3681,6 @@ version = "1.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2" -[[package]] -name = "dunce" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92773504d58c093f6de2459af4af33faa518c13451eb8f2b5698ed3d36e7c813" - [[package]] name = "duration-str" version = "0.11.2" @@ -4314,12 +4270,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "fs_extra" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" - [[package]] name = "fsevent-sys" version = "4.1.0" @@ -5955,12 +5905,6 @@ dependencies = [ "spin 0.9.8", ] -[[package]] -name = "lazycell" -version = "1.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" - [[package]] name = "levenshtein_automata" version = "0.2.1" @@ -6966,7 +6910,7 @@ checksum = "06f19e4cfa0ab5a76b627cec2d81331c49b034988eaf302c3bafeada684eadef" dependencies = [ "base64 0.21.7", "bigdecimal 0.4.5", - "bindgen 0.70.1", + "bindgen", "bitflags 2.6.0", "bitvec", "btoi", @@ -7005,7 +6949,7 @@ checksum = "478b0ff3f7d67b79da2b96f56f334431aef65e15ba4b29dd74a4236e29582bdc" dependencies = [ "base64 0.21.7", "bigdecimal 0.4.5", - "bindgen 0.70.1", + "bindgen", "bitflags 2.6.0", "bitvec", "btoi", @@ -10876,7 +10820,6 @@ dependencies = [ "arrow-schema", "async-trait", "auth", - "aws-lc-sys", "axum", "axum-macros", "base64 0.21.7", diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index c01560724931..ddfeaf27bd45 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -139,11 +139,6 @@ tokio-test = "0.4" [target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.13", features = ["criterion", "flamegraph"] } -[target.'cfg(windows)'.dependencies] -aws-lc-sys = { version = "0.21.0", features = [ - "prebuilt-nasm", -] } # use prebuilt nasm on windows per https://github.com/aws/aws-lc-rs/blob/main/aws-lc-sys/README.md#use-of-prebuilt-nasm-objects - [build-dependencies] common-version.workspace = true From a6893aad421fcc242a67b859a7aa21624bbe8a17 Mon Sep 17 00:00:00 2001 From: jeremyhi Date: Wed, 11 Dec 2024 16:04:02 +0800 Subject: [PATCH 04/59] chore: set store_key_prefix for all kvbackend (#5132) --- src/meta-srv/src/bootstrap.rs | 44 +++++++++++++++++------------------ src/meta-srv/src/metasrv.rs | 4 ++++ 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/meta-srv/src/bootstrap.rs b/src/meta-srv/src/bootstrap.rs index 47afa0ab416b..85770e1f3d4d 100644 --- a/src/meta-srv/src/bootstrap.rs +++ b/src/meta-srv/src/bootstrap.rs @@ -206,43 +206,41 @@ pub async fn metasrv_builder( plugins: Plugins, kv_backend: Option, ) -> Result { - let (kv_backend, election) = match (kv_backend, &opts.backend) { + let (mut kv_backend, election) = match (kv_backend, &opts.backend) { (Some(kv_backend), _) => (kv_backend, None), (None, BackendImpl::MemoryStore) => (Arc::new(MemoryKvBackend::new()) as _, None), (None, BackendImpl::EtcdStore) => { let etcd_client = create_etcd_client(opts).await?; - let kv_backend = { - let etcd_backend = - EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops); - if !opts.store_key_prefix.is_empty() { - Arc::new(ChrootKvBackend::new( - opts.store_key_prefix.clone().into_bytes(), - etcd_backend, - )) - } else { - etcd_backend - } - }; - ( - kv_backend, - Some( - EtcdElection::with_etcd_client( - &opts.server_addr, - etcd_client.clone(), - opts.store_key_prefix.clone(), - ) - .await?, - ), + let kv_backend = EtcdStore::with_etcd_client(etcd_client.clone(), opts.max_txn_ops); + let election = EtcdElection::with_etcd_client( + &opts.server_addr, + etcd_client, + opts.store_key_prefix.clone(), ) + .await?; + + (kv_backend, Some(election)) } #[cfg(feature = "pg_kvbackend")] (None, BackendImpl::PostgresStore) => { let pg_client = create_postgres_client(opts).await?; let kv_backend = PgStore::with_pg_client(pg_client).await.unwrap(); + // TODO(jeremy, weny): implement election for postgres (kv_backend, None) } }; + if !opts.store_key_prefix.is_empty() { + info!( + "using chroot kv backend with prefix: {prefix}", + prefix = opts.store_key_prefix + ); + kv_backend = Arc::new(ChrootKvBackend::new( + opts.store_key_prefix.clone().into_bytes(), + kv_backend, + )) + } + let in_memory = Arc::new(MemoryKvBackend::new()) as ResettableKvBackendRef; let selector = match opts.selector { diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index 716b85f83485..da614ac9b943 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -470,6 +470,10 @@ impl Metasrv { }); } } else { + warn!( + "Ensure only one instance of Metasrv is running, as there is no election service." + ); + if let Err(e) = self.wal_options_allocator.start().await { error!(e; "Failed to start wal options allocator"); } From 2c4ac76754265762c884958cc0057170ed97081a Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Wed, 11 Dec 2024 16:08:05 +0800 Subject: [PATCH 05/59] feat: adjust WAL purge default configurations (#5107) * feat: adjust WAL purge default configurations * fix: config * feat: change raft engine file_size default to 128Mib --- config/config.md | 22 +++++++++++----------- config/datanode.example.toml | 11 +++++------ config/metasrv.example.toml | 16 ++++++++-------- config/standalone.example.toml | 12 ++++++------ src/common/wal/src/config/raft_engine.rs | 6 +++--- tests-integration/tests/http.rs | 6 +++--- 6 files changed, 36 insertions(+), 37 deletions(-) diff --git a/config/config.md b/config/config.md index 0f70a8cb126e..6a500a5b4a34 100644 --- a/config/config.md +++ b/config/config.md @@ -13,11 +13,11 @@ | Key | Type | Default | Descriptions | | --- | -----| ------- | ----------- | | `mode` | String | `standalone` | The running mode of the datanode. It can be `standalone` or `distributed`. | -| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. | | `default_timezone` | String | Unset | The default timezone of the server. | | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.
By default, it provides services after all regions have been initialized. | | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. | | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. | +| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. | | `runtime` | -- | -- | The runtime options. | | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. | | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. | @@ -61,9 +61,9 @@ | `wal` | -- | -- | The WAL options. | | `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | | `wal.dir` | String | Unset | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | -| `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.file_size` | String | `128MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | | `wal.sync_write` | Bool | `false` | Whether to use sync write.
**It's only used when the provider is `raft_engine`**. | | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | @@ -286,12 +286,12 @@ | `bind_addr` | String | `127.0.0.1:3002` | The bind address of metasrv. | | `server_addr` | String | `127.0.0.1:3002` | The communication server address for frontend and datanode to connect to metasrv, "127.0.0.1:3002" by default for localhost. | | `store_addrs` | Array | -- | Store server address default to etcd store. | +| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. | +| `backend` | String | `EtcdStore` | The datastore for meta server. | | `selector` | String | `round_robin` | Datanode selector type.
- `round_robin` (default value)
- `lease_based`
- `load_based`
For details, please see "https://docs.greptime.com/developer-guide/metasrv/selector". | | `use_memory_store` | Bool | `false` | Store data in memory. | -| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. | -| `store_key_prefix` | String | `""` | If it's not empty, the metasrv will store all data with this key prefix. | | `enable_region_failover` | Bool | `false` | Whether to enable region failover.
This feature is only available on GreptimeDB running on cluster mode and
- Using Remote WAL
- Using shared storage (e.g., s3). | -| `backend` | String | `EtcdStore` | The datastore for meta server. | +| `enable_telemetry` | Bool | `true` | Whether to enable greptimedb telemetry. Enabled by default. | | `runtime` | -- | -- | The runtime options. | | `runtime.global_rt_size` | Integer | `8` | The number of threads to execute the runtime for global read operations. | | `runtime.compact_rt_size` | Integer | `4` | The number of threads to execute the runtime for global write operations. | @@ -356,7 +356,6 @@ | `node_id` | Integer | Unset | The datanode identifier and should be unique in the cluster. | | `require_lease_before_startup` | Bool | `false` | Start services after regions have obtained leases.
It will block the datanode start if it can't receive leases in the heartbeat from metasrv. | | `init_regions_in_background` | Bool | `false` | Initialize all regions in the background during the startup.
By default, it provides services after all regions have been initialized. | -| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. | | `init_regions_parallelism` | Integer | `16` | Parallelism of initializing regions. | | `max_concurrent_queries` | Integer | `0` | The maximum current queries allowed to be executed. Zero means unlimited. | | `rpc_addr` | String | Unset | Deprecated, use `grpc.addr` instead. | @@ -364,6 +363,7 @@ | `rpc_runtime_size` | Integer | Unset | Deprecated, use `grpc.runtime_size` instead. | | `rpc_max_recv_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_recv_message_size` instead. | | `rpc_max_send_message_size` | String | Unset | Deprecated, use `grpc.rpc_max_send_message_size` instead. | +| `enable_telemetry` | Bool | `true` | Enable telemetry to collect anonymous usage data. Enabled by default. | | `http` | -- | -- | The HTTP server options. | | `http.addr` | String | `127.0.0.1:4000` | The address to bind the HTTP server. | | `http.timeout` | String | `30s` | HTTP request timeout. Set to 0 to disable timeout. | @@ -398,9 +398,9 @@ | `wal` | -- | -- | The WAL options. | | `wal.provider` | String | `raft_engine` | The provider of the WAL.
- `raft_engine`: the wal is stored in the local file system by raft-engine.
- `kafka`: it's remote wal that data is stored in Kafka. | | `wal.dir` | String | Unset | The directory to store the WAL files.
**It's only used when the provider is `raft_engine`**. | -| `wal.file_size` | String | `256MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_threshold` | String | `4GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | -| `wal.purge_interval` | String | `10m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.file_size` | String | `128MB` | The size of the WAL segment file.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_threshold` | String | `1GB` | The threshold of the WAL size to trigger a flush.
**It's only used when the provider is `raft_engine`**. | +| `wal.purge_interval` | String | `1m` | The interval to trigger a flush.
**It's only used when the provider is `raft_engine`**. | | `wal.read_batch_size` | Integer | `128` | The read batch size.
**It's only used when the provider is `raft_engine`**. | | `wal.sync_write` | Bool | `false` | Whether to use sync write.
**It's only used when the provider is `raft_engine`**. | | `wal.enable_log_recycle` | Bool | `true` | Whether to reuse logically truncated log files.
**It's only used when the provider is `raft_engine`**. | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 8bfa8732cc41..0ba80a9f7d92 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -13,9 +13,6 @@ require_lease_before_startup = false ## By default, it provides services after all regions have been initialized. init_regions_in_background = false -## Enable telemetry to collect anonymous usage data. -enable_telemetry = true - ## Parallelism of initializing regions. init_regions_parallelism = 16 @@ -42,6 +39,8 @@ rpc_max_recv_message_size = "512MB" ## @toml2docs:none-default rpc_max_send_message_size = "512MB" +## Enable telemetry to collect anonymous usage data. Enabled by default. +#+ enable_telemetry = true ## The HTTP server options. [http] @@ -143,15 +142,15 @@ dir = "/tmp/greptimedb/wal" ## The size of the WAL segment file. ## **It's only used when the provider is `raft_engine`**. -file_size = "256MB" +file_size = "128MB" ## The threshold of the WAL size to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_threshold = "4GB" +purge_threshold = "1GB" ## The interval to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_interval = "10m" +purge_interval = "1m" ## The read batch size. ## **It's only used when the provider is `raft_engine`**. diff --git a/config/metasrv.example.toml b/config/metasrv.example.toml index bcd7ee41412b..27716b5aa37b 100644 --- a/config/metasrv.example.toml +++ b/config/metasrv.example.toml @@ -10,6 +10,12 @@ server_addr = "127.0.0.1:3002" ## Store server address default to etcd store. store_addrs = ["127.0.0.1:2379"] +## If it's not empty, the metasrv will store all data with this key prefix. +store_key_prefix = "" + +## The datastore for meta server. +backend = "EtcdStore" + ## Datanode selector type. ## - `round_robin` (default value) ## - `lease_based` @@ -20,20 +26,14 @@ selector = "round_robin" ## Store data in memory. use_memory_store = false -## Whether to enable greptimedb telemetry. -enable_telemetry = true - -## If it's not empty, the metasrv will store all data with this key prefix. -store_key_prefix = "" - ## Whether to enable region failover. ## This feature is only available on GreptimeDB running on cluster mode and ## - Using Remote WAL ## - Using shared storage (e.g., s3). enable_region_failover = false -## The datastore for meta server. -backend = "EtcdStore" +## Whether to enable greptimedb telemetry. Enabled by default. +#+ enable_telemetry = true ## The runtime options. #+ [runtime] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 56cbeaddb9d5..8eae532d6166 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -1,9 +1,6 @@ ## The running mode of the datanode. It can be `standalone` or `distributed`. mode = "standalone" -## Enable telemetry to collect anonymous usage data. -enable_telemetry = true - ## The default timezone of the server. ## @toml2docs:none-default default_timezone = "UTC" @@ -18,6 +15,9 @@ init_regions_parallelism = 16 ## The maximum current queries allowed to be executed. Zero means unlimited. max_concurrent_queries = 0 +## Enable telemetry to collect anonymous usage data. Enabled by default. +#+ enable_telemetry = true + ## The runtime options. #+ [runtime] ## The number of threads to execute the runtime for global read operations. @@ -147,15 +147,15 @@ dir = "/tmp/greptimedb/wal" ## The size of the WAL segment file. ## **It's only used when the provider is `raft_engine`**. -file_size = "256MB" +file_size = "128MB" ## The threshold of the WAL size to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_threshold = "4GB" +purge_threshold = "1GB" ## The interval to trigger a flush. ## **It's only used when the provider is `raft_engine`**. -purge_interval = "10m" +purge_interval = "1m" ## The read batch size. ## **It's only used when the provider is `raft_engine`**. diff --git a/src/common/wal/src/config/raft_engine.rs b/src/common/wal/src/config/raft_engine.rs index af5daa9d386d..cfefd0c758b7 100644 --- a/src/common/wal/src/config/raft_engine.rs +++ b/src/common/wal/src/config/raft_engine.rs @@ -49,9 +49,9 @@ impl Default for RaftEngineConfig { fn default() -> Self { Self { dir: None, - file_size: ReadableSize::mb(256), - purge_threshold: ReadableSize::gb(4), - purge_interval: Duration::from_secs(600), + file_size: ReadableSize::mb(128), + purge_threshold: ReadableSize::gb(1), + purge_interval: Duration::from_secs(60), read_batch_size: 128, sync_write: false, enable_log_recycle: true, diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 9d7b81f3919b..4da65f0b21f5 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -886,9 +886,9 @@ with_metric_engine = true [wal] provider = "raft_engine" -file_size = "256MiB" -purge_threshold = "4GiB" -purge_interval = "10m" +file_size = "128MiB" +purge_threshold = "1GiB" +purge_interval = "1m" read_batch_size = 128 sync_write = false enable_log_recycle = true From a30d918df2bf4b57e65b59d3eef26765ad96f6b6 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Wed, 11 Dec 2024 16:24:41 +0800 Subject: [PATCH 06/59] perf: avoid cache during compaction (#5135) * Revert "refactor: Avoid wrapping Option for CacheManagerRef (#4996)" This reverts commit 42bf7e99655bf842a08c657d1d601c0a8a9f41f2. * fix: memory usage during log ingestion * fix: fmt --- src/mito2/src/cache/write_cache.rs | 2 +- src/mito2/src/compaction.rs | 3 +- src/mito2/src/compaction/compactor.rs | 1 - src/mito2/src/engine.rs | 16 ++++++---- src/mito2/src/read/last_row.rs | 24 ++++++++++---- src/mito2/src/read/projection.rs | 24 +++++++------- src/mito2/src/read/range.rs | 9 +++--- src/mito2/src/read/scan_region.rs | 19 +++++++---- src/mito2/src/read/seq_scan.rs | 2 +- src/mito2/src/read/unordered_scan.rs | 2 +- src/mito2/src/sst/parquet.rs | 10 +++--- src/mito2/src/sst/parquet/reader.rs | 32 +++++++++---------- src/mito2/src/sst/parquet/row_group.rs | 44 +++++++++++++++----------- 13 files changed, 108 insertions(+), 80 deletions(-) diff --git a/src/mito2/src/cache/write_cache.rs b/src/mito2/src/cache/write_cache.rs index 4e2fe357fd09..8a431f22a63d 100644 --- a/src/mito2/src/cache/write_cache.rs +++ b/src/mito2/src/cache/write_cache.rs @@ -501,7 +501,7 @@ mod tests { // Read metadata from write cache let builder = ParquetReaderBuilder::new(data_home, handle.clone(), mock_store.clone()) - .cache(cache_manager.clone()); + .cache(Some(cache_manager.clone())); let reader = builder.build().await.unwrap(); // Check parquet metadata diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs index 5f462f33a111..2b70f455d815 100644 --- a/src/mito2/src/compaction.rs +++ b/src/mito2/src/compaction.rs @@ -570,7 +570,6 @@ pub struct SerializedCompactionOutput { struct CompactionSstReaderBuilder<'a> { metadata: RegionMetadataRef, sst_layer: AccessLayerRef, - cache: CacheManagerRef, inputs: &'a [FileHandle], append_mode: bool, filter_deleted: bool, @@ -584,7 +583,7 @@ impl<'a> CompactionSstReaderBuilder<'a> { let mut scan_input = ScanInput::new(self.sst_layer, ProjectionMapper::all(&self.metadata)?) .with_files(self.inputs.to_vec()) .with_append_mode(self.append_mode) - .with_cache(self.cache) + .with_cache(None) .with_filter_deleted(self.filter_deleted) // We ignore file not found error during compaction. .with_ignore_file_not_found(true) diff --git a/src/mito2/src/compaction/compactor.rs b/src/mito2/src/compaction/compactor.rs index 792634b2e4a2..91ab34c961cf 100644 --- a/src/mito2/src/compaction/compactor.rs +++ b/src/mito2/src/compaction/compactor.rs @@ -296,7 +296,6 @@ impl Compactor for DefaultCompactor { let reader = CompactionSstReaderBuilder { metadata: region_metadata.clone(), sst_layer: sst_layer.clone(), - cache: cache_manager.clone(), inputs: &output.inputs, append_mode, filter_deleted: output.filter_deleted, diff --git a/src/mito2/src/engine.rs b/src/mito2/src/engine.rs index a518da32535d..9b912318e16b 100644 --- a/src/mito2/src/engine.rs +++ b/src/mito2/src/engine.rs @@ -424,12 +424,16 @@ impl EngineInner { // Get cache. let cache_manager = self.workers.cache_manager(); - let scan_region = - ScanRegion::new(version, region.access_layer.clone(), request, cache_manager) - .with_parallel_scan_channel_size(self.config.parallel_scan_channel_size) - .with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled()) - .with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled()) - .with_start_time(query_start); + let scan_region = ScanRegion::new( + version, + region.access_layer.clone(), + request, + Some(cache_manager), + ) + .with_parallel_scan_channel_size(self.config.parallel_scan_channel_size) + .with_ignore_inverted_index(self.config.inverted_index.apply_on_query.disabled()) + .with_ignore_fulltext_index(self.config.fulltext_index.apply_on_query.disabled()) + .with_start_time(query_start); Ok(scan_region) } diff --git a/src/mito2/src/read/last_row.rs b/src/mito2/src/read/last_row.rs index ee775a8ec2ba..79d035e03271 100644 --- a/src/mito2/src/read/last_row.rs +++ b/src/mito2/src/read/last_row.rs @@ -86,7 +86,7 @@ impl RowGroupLastRowCachedReader { pub(crate) fn new( file_id: FileId, row_group_idx: usize, - cache_manager: CacheManagerRef, + cache_manager: Option, row_group_reader: RowGroupReader, ) -> Self { let key = SelectorResultKey { @@ -95,6 +95,9 @@ impl RowGroupLastRowCachedReader { selector: TimeSeriesRowSelector::LastRow, }; + let Some(cache_manager) = cache_manager else { + return Self::new_miss(key, row_group_reader, None); + }; if let Some(value) = cache_manager.get_selector_result(&key) { let schema_matches = value.projection == row_group_reader @@ -105,10 +108,10 @@ impl RowGroupLastRowCachedReader { // Schema matches, use cache batches. Self::new_hit(value) } else { - Self::new_miss(key, row_group_reader, cache_manager) + Self::new_miss(key, row_group_reader, Some(cache_manager)) } } else { - Self::new_miss(key, row_group_reader, cache_manager) + Self::new_miss(key, row_group_reader, Some(cache_manager)) } } @@ -122,7 +125,7 @@ impl RowGroupLastRowCachedReader { fn new_miss( key: SelectorResultKey, row_group_reader: RowGroupReader, - cache_manager: CacheManagerRef, + cache_manager: Option, ) -> Self { selector_result_cache_miss(); Self::Miss(RowGroupLastRowReader::new( @@ -167,13 +170,17 @@ pub(crate) struct RowGroupLastRowReader { reader: RowGroupReader, selector: LastRowSelector, yielded_batches: Vec, - cache_manager: CacheManagerRef, + cache_manager: Option, /// Index buffer to take a new batch from the last row. take_index: UInt32Vector, } impl RowGroupLastRowReader { - fn new(key: SelectorResultKey, reader: RowGroupReader, cache_manager: CacheManagerRef) -> Self { + fn new( + key: SelectorResultKey, + reader: RowGroupReader, + cache_manager: Option, + ) -> Self { Self { key, reader, @@ -213,6 +220,9 @@ impl RowGroupLastRowReader { // we always expect that row groups yields batches. return; } + let Some(cache) = &self.cache_manager else { + return; + }; let value = Arc::new(SelectorResultValue { result: std::mem::take(&mut self.yielded_batches), projection: self @@ -222,7 +232,7 @@ impl RowGroupLastRowReader { .projection_indices() .to_vec(), }); - self.cache_manager.put_selector_result(self.key, value); + cache.put_selector_result(self.key, value); } } diff --git a/src/mito2/src/read/projection.rs b/src/mito2/src/read/projection.rs index 78866f0c1ba0..9ba5f6eccf1e 100644 --- a/src/mito2/src/read/projection.rs +++ b/src/mito2/src/read/projection.rs @@ -171,7 +171,7 @@ impl ProjectionMapper { pub(crate) fn convert( &self, batch: &Batch, - cache_manager: &CacheManager, + cache_manager: Option<&CacheManager>, ) -> common_recordbatch::error::Result { debug_assert_eq!(self.batch_fields.len(), batch.fields().len()); debug_assert!(self @@ -204,12 +204,15 @@ impl ProjectionMapper { match index { BatchIndex::Tag(idx) => { let value = &pk_values[*idx]; - let vector = repeated_vector_with_cache( - &column_schema.data_type, - value, - num_rows, - cache_manager, - )?; + let vector = match cache_manager { + Some(cache) => repeated_vector_with_cache( + &column_schema.data_type, + value, + num_rows, + cache, + )?, + None => new_repeated_vector(&column_schema.data_type, value, num_rows)?, + }; columns.push(vector); } BatchIndex::Timestamp => { @@ -357,7 +360,7 @@ mod tests { // With vector cache. let cache = CacheManager::builder().vector_cache_size(1024).build(); let batch = new_batch(0, &[1, 2], &[(3, 3), (4, 4)], 3); - let record_batch = mapper.convert(&batch, &cache).unwrap(); + let record_batch = mapper.convert(&batch, Some(&cache)).unwrap(); let expect = "\ +---------------------+----+----+----+----+ | ts | k0 | k1 | v0 | v1 | @@ -377,7 +380,7 @@ mod tests { assert!(cache .get_repeated_vector(&ConcreteDataType::int64_datatype(), &Value::Int64(3)) .is_none()); - let record_batch = mapper.convert(&batch, &cache).unwrap(); + let record_batch = mapper.convert(&batch, Some(&cache)).unwrap(); assert_eq!(expect, print_record_batch(record_batch)); } @@ -398,8 +401,7 @@ mod tests { ); let batch = new_batch(0, &[1, 2], &[(4, 4)], 3); - let cache = CacheManager::builder().vector_cache_size(1024).build(); - let record_batch = mapper.convert(&batch, &cache).unwrap(); + let record_batch = mapper.convert(&batch, None).unwrap(); let expect = "\ +----+----+ | v1 | k0 | diff --git a/src/mito2/src/read/range.rs b/src/mito2/src/read/range.rs index 554751830ffc..bdad5f8fef0c 100644 --- a/src/mito2/src/read/range.rs +++ b/src/mito2/src/read/range.rs @@ -112,7 +112,7 @@ impl RangeMeta { Self::push_unordered_file_ranges( input.memtables.len(), &input.files, - &input.cache_manager, + input.cache_manager.as_deref(), &mut ranges, ); @@ -203,15 +203,16 @@ impl RangeMeta { fn push_unordered_file_ranges( num_memtables: usize, files: &[FileHandle], - cache: &CacheManager, + cache: Option<&CacheManager>, ranges: &mut Vec, ) { // For append mode, we can parallelize reading row groups. for (i, file) in files.iter().enumerate() { let file_index = num_memtables + i; // Get parquet meta from the cache. - let parquet_meta = - cache.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id()); + let parquet_meta = cache.and_then(|c| { + c.get_parquet_meta_data_from_mem_cache(file.region_id(), file.file_id()) + }); if let Some(parquet_meta) = parquet_meta { // Scans each row group. for row_group_index in 0..file.meta_ref().num_row_groups { diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 471cc1a8e5d4..19324f119f3e 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -167,7 +167,7 @@ pub(crate) struct ScanRegion { /// Scan request. request: ScanRequest, /// Cache. - cache_manager: CacheManagerRef, + cache_manager: Option, /// Capacity of the channel to send data from parallel scan tasks to the main task. parallel_scan_channel_size: usize, /// Whether to ignore inverted index. @@ -184,7 +184,7 @@ impl ScanRegion { version: VersionRef, access_layer: AccessLayerRef, request: ScanRequest, - cache_manager: CacheManagerRef, + cache_manager: Option, ) -> ScanRegion { ScanRegion { version, @@ -401,12 +401,17 @@ impl ScanRegion { } let file_cache = || -> Option { - let write_cache = self.cache_manager.write_cache()?; + let cache_manager = self.cache_manager.as_ref()?; + let write_cache = cache_manager.write_cache()?; let file_cache = write_cache.file_cache(); Some(file_cache) }(); - let index_cache = self.cache_manager.index_cache().cloned(); + let index_cache = self + .cache_manager + .as_ref() + .and_then(|c| c.index_cache()) + .cloned(); InvertedIndexApplierBuilder::new( self.access_layer.region_dir().to_string(), @@ -477,7 +482,7 @@ pub(crate) struct ScanInput { /// Handles to SST files to scan. pub(crate) files: Vec, /// Cache. - pub(crate) cache_manager: CacheManagerRef, + pub(crate) cache_manager: Option, /// Ignores file not found error. ignore_file_not_found: bool, /// Capacity of the channel to send data from parallel scan tasks to the main task. @@ -508,7 +513,7 @@ impl ScanInput { predicate: None, memtables: Vec::new(), files: Vec::new(), - cache_manager: CacheManagerRef::default(), + cache_manager: None, ignore_file_not_found: false, parallel_scan_channel_size: DEFAULT_SCAN_CHANNEL_SIZE, inverted_index_applier: None, @@ -551,7 +556,7 @@ impl ScanInput { /// Sets cache for this query. #[must_use] - pub(crate) fn with_cache(mut self, cache: CacheManagerRef) -> Self { + pub(crate) fn with_cache(mut self, cache: Option) -> Self { self.cache_manager = cache; self } diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs index d8732cb93df2..bdf3a7d6b8bb 100644 --- a/src/mito2/src/read/seq_scan.rs +++ b/src/mito2/src/read/seq_scan.rs @@ -257,7 +257,7 @@ impl SeqScan { .await .map_err(BoxedError::new) .context(ExternalSnafu)?; - let cache = &stream_ctx.input.cache_manager; + let cache = stream_ctx.input.cache_manager.as_deref(); let mut metrics = ScannerMetrics::default(); let mut fetch_start = Instant::now(); #[cfg(debug_assertions)] diff --git a/src/mito2/src/read/unordered_scan.rs b/src/mito2/src/read/unordered_scan.rs index 97db9b86592c..60e5ca5c7cdb 100644 --- a/src/mito2/src/read/unordered_scan.rs +++ b/src/mito2/src/read/unordered_scan.rs @@ -149,7 +149,7 @@ impl UnorderedScan { let stream = try_stream! { part_metrics.on_first_poll(); - let cache = &stream_ctx.input.cache_manager; + let cache = stream_ctx.input.cache_manager.as_deref(); let range_builder_list = Arc::new(RangeBuilderList::new( stream_ctx.input.num_memtables(), stream_ctx.input.num_files(), diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs index c94ae600735f..ae51a0d37c29 100644 --- a/src/mito2/src/sst/parquet.rs +++ b/src/mito2/src/sst/parquet.rs @@ -195,11 +195,11 @@ mod tests { .unwrap(); // Enable page cache. - let cache = Arc::new( + let cache = Some(Arc::new( CacheManager::builder() .page_cache_size(64 * 1024 * 1024) .build(), - ); + )); let builder = ParquetReaderBuilder::new(FILE_DIR.to_string(), handle.clone(), object_store) .cache(cache.clone()); for _ in 0..3 { @@ -219,15 +219,15 @@ mod tests { // Doesn't have compressed page cached. let page_key = PageKey::new_compressed(metadata.region_id, handle.file_id(), 0, 0); - assert!(cache.get_pages(&page_key).is_none()); + assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none()); // Cache 4 row groups. for i in 0..4 { let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), i, 0); - assert!(cache.get_pages(&page_key).is_some()); + assert!(cache.as_ref().unwrap().get_pages(&page_key).is_some()); } let page_key = PageKey::new_uncompressed(metadata.region_id, handle.file_id(), 5, 0); - assert!(cache.get_pages(&page_key).is_none()); + assert!(cache.as_ref().unwrap().get_pages(&page_key).is_none()); } #[tokio::test] diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index cd219f47ccd6..b73026a7a6e3 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -82,7 +82,7 @@ pub struct ParquetReaderBuilder { /// can contain columns not in the parquet file. projection: Option>, /// Manager that caches SST data. - cache_manager: CacheManagerRef, + cache_manager: Option, /// Index appliers. inverted_index_applier: Option, fulltext_index_applier: Option, @@ -106,7 +106,7 @@ impl ParquetReaderBuilder { predicate: None, time_range: None, projection: None, - cache_manager: CacheManagerRef::default(), + cache_manager: None, inverted_index_applier: None, fulltext_index_applier: None, expected_metadata: None, @@ -138,7 +138,7 @@ impl ParquetReaderBuilder { /// Attaches the cache to the builder. #[must_use] - pub fn cache(mut self, cache: CacheManagerRef) -> ParquetReaderBuilder { + pub fn cache(mut self, cache: Option) -> ParquetReaderBuilder { self.cache_manager = cache; self } @@ -313,12 +313,10 @@ impl ParquetReaderBuilder { let region_id = self.file_handle.region_id(); let file_id = self.file_handle.file_id(); // Tries to get from global cache. - if let Some(metadata) = self - .cache_manager - .get_parquet_meta_data(region_id, file_id) - .await - { - return Ok(metadata); + if let Some(manager) = &self.cache_manager { + if let Some(metadata) = manager.get_parquet_meta_data(region_id, file_id).await { + return Ok(metadata); + } } // Cache miss, load metadata directly. @@ -326,11 +324,13 @@ impl ParquetReaderBuilder { let metadata = metadata_loader.load().await?; let metadata = Arc::new(metadata); // Cache the metadata. - self.cache_manager.put_parquet_meta_data( - self.file_handle.region_id(), - self.file_handle.file_id(), - metadata.clone(), - ); + if let Some(cache) = &self.cache_manager { + cache.put_parquet_meta_data( + self.file_handle.region_id(), + self.file_handle.file_id(), + metadata.clone(), + ); + } Ok(metadata) } @@ -846,7 +846,7 @@ pub(crate) struct RowGroupReaderBuilder { /// Field levels to read. field_levels: FieldLevels, /// Cache. - cache_manager: CacheManagerRef, + cache_manager: Option, } impl RowGroupReaderBuilder { @@ -864,7 +864,7 @@ impl RowGroupReaderBuilder { &self.parquet_meta } - pub(crate) fn cache_manager(&self) -> &CacheManagerRef { + pub(crate) fn cache_manager(&self) -> &Option { &self.cache_manager } diff --git a/src/mito2/src/sst/parquet/row_group.rs b/src/mito2/src/sst/parquet/row_group.rs index dd572d8863f8..73382c06d9b3 100644 --- a/src/mito2/src/sst/parquet/row_group.rs +++ b/src/mito2/src/sst/parquet/row_group.rs @@ -48,7 +48,7 @@ pub struct InMemoryRowGroup<'a> { region_id: RegionId, file_id: FileId, row_group_idx: usize, - cache_manager: CacheManagerRef, + cache_manager: Option, /// Row group level cached pages for each column. /// /// These pages are uncompressed pages of a row group. @@ -69,7 +69,7 @@ impl<'a> InMemoryRowGroup<'a> { file_id: FileId, parquet_meta: &'a ParquetMetaData, row_group_idx: usize, - cache_manager: CacheManagerRef, + cache_manager: Option, file_path: &'a str, object_store: ObjectStore, ) -> Self { @@ -208,18 +208,19 @@ impl<'a> InMemoryRowGroup<'a> { }; let column = self.metadata.column(idx); - - if !cache_uncompressed_pages(column) { - // For columns that have multiple uncompressed pages, we only cache the compressed page - // to save memory. - let page_key = PageKey::new_compressed( - self.region_id, - self.file_id, - self.row_group_idx, - idx, - ); - self.cache_manager - .put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone()))); + if let Some(cache) = &self.cache_manager { + if !cache_uncompressed_pages(column) { + // For columns that have multiple uncompressed pages, we only cache the compressed page + // to save memory. + let page_key = PageKey::new_compressed( + self.region_id, + self.file_id, + self.row_group_idx, + idx, + ); + cache + .put_pages(page_key, Arc::new(PageValue::new_compressed(data.clone()))); + } } *chunk = Some(Arc::new(ColumnChunkData::Dense { @@ -241,6 +242,9 @@ impl<'a> InMemoryRowGroup<'a> { .enumerate() .filter(|(idx, chunk)| chunk.is_none() && projection.leaf_included(*idx)) .for_each(|(idx, chunk)| { + let Some(cache) = &self.cache_manager else { + return; + }; let column = self.metadata.column(idx); if cache_uncompressed_pages(column) { // Fetches uncompressed pages for the row group. @@ -250,7 +254,7 @@ impl<'a> InMemoryRowGroup<'a> { self.row_group_idx, idx, ); - self.column_uncompressed_pages[idx] = self.cache_manager.get_pages(&page_key); + self.column_uncompressed_pages[idx] = cache.get_pages(&page_key); } else { // Fetches the compressed page from the cache. let page_key = PageKey::new_compressed( @@ -260,7 +264,7 @@ impl<'a> InMemoryRowGroup<'a> { idx, ); - *chunk = self.cache_manager.get_pages(&page_key).map(|page_value| { + *chunk = cache.get_pages(&page_key).map(|page_value| { Arc::new(ColumnChunkData::Dense { offset: column.byte_range().0 as usize, data: page_value.compressed.clone(), @@ -296,7 +300,7 @@ impl<'a> InMemoryRowGroup<'a> { key: IndexKey, ranges: &[Range], ) -> Option> { - if let Some(cache) = self.cache_manager.write_cache() { + if let Some(cache) = self.cache_manager.as_ref()?.write_cache() { return cache.file_cache().read_ranges(key, ranges).await; } None @@ -327,6 +331,10 @@ impl<'a> InMemoryRowGroup<'a> { } }; + let Some(cache) = &self.cache_manager else { + return Ok(Box::new(page_reader)); + }; + let column = self.metadata.column(i); if cache_uncompressed_pages(column) { // This column use row group level page cache. @@ -335,7 +343,7 @@ impl<'a> InMemoryRowGroup<'a> { let page_value = Arc::new(PageValue::new_row_group(pages)); let page_key = PageKey::new_uncompressed(self.region_id, self.file_id, self.row_group_idx, i); - self.cache_manager.put_pages(page_key, page_value.clone()); + cache.put_pages(page_key, page_value.clone()); return Ok(Box::new(RowGroupCachedReader::new(&page_value.row_group))); } From e1e39993f7847821da113b0102357ec6b07ec0f0 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Wed, 11 Dec 2024 17:25:56 +0800 Subject: [PATCH 07/59] feat(vector): add scalar add function (#5119) * refactor: extract implicit conversion helper functions of vector Signed-off-by: Zhenchi * feat(vector): add scalar add function Signed-off-by: Zhenchi * fix fmt Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- src/common/function/src/scalars/vector.rs | 4 + .../function/src/scalars/vector/impl_conv.rs | 1 - .../function/src/scalars/vector/scalar_add.rs | 173 ++++++++++++++++++ .../function/vector/vector_scalar.result | 48 +++++ .../common/function/vector/vector_scalar.sql | 11 ++ 5 files changed, 236 insertions(+), 1 deletion(-) create mode 100644 src/common/function/src/scalars/vector/scalar_add.rs create mode 100644 tests/cases/standalone/common/function/vector/vector_scalar.result create mode 100644 tests/cases/standalone/common/function/vector/vector_scalar.sql diff --git a/src/common/function/src/scalars/vector.rs b/src/common/function/src/scalars/vector.rs index 7c8cf5550e25..0c0428ce9a45 100644 --- a/src/common/function/src/scalars/vector.rs +++ b/src/common/function/src/scalars/vector.rs @@ -15,6 +15,7 @@ mod convert; mod distance; pub(crate) mod impl_conv; +mod scalar_add; use std::sync::Arc; @@ -32,5 +33,8 @@ impl VectorFunction { registry.register(Arc::new(distance::CosDistanceFunction)); registry.register(Arc::new(distance::DotProductFunction)); registry.register(Arc::new(distance::L2SqDistanceFunction)); + + // scalar calculation + registry.register(Arc::new(scalar_add::ScalarAddFunction)); } } diff --git a/src/common/function/src/scalars/vector/impl_conv.rs b/src/common/function/src/scalars/vector/impl_conv.rs index 903bfb2a0336..70a142c2906b 100644 --- a/src/common/function/src/scalars/vector/impl_conv.rs +++ b/src/common/function/src/scalars/vector/impl_conv.rs @@ -109,7 +109,6 @@ pub fn parse_veclit_from_strlit(s: &str) -> Result> { }) } -#[allow(unused)] /// Convert a vector literal to a binary literal. pub fn veclit_to_binlit(vec: &[f32]) -> Vec { if cfg!(target_endian = "little") { diff --git a/src/common/function/src/scalars/vector/scalar_add.rs b/src/common/function/src/scalars/vector/scalar_add.rs new file mode 100644 index 000000000000..ef016eff4b47 --- /dev/null +++ b/src/common/function/src/scalars/vector/scalar_add.rs @@ -0,0 +1,173 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::Signature; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use nalgebra::DVectorView; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; +use crate::helper; +use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit}; + +const NAME: &str = "vec_scalar_add"; + +/// Adds a scalar to each element of a vector. +/// +/// # Example +/// +/// ```sql +/// SELECT vec_to_string(vec_scalar_add(1, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [2,3,4] | +/// +---------+ +/// +/// -- Negative scalar to simulate subtraction +/// SELECT vec_to_string(vec_scalar_add(-1, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [0,1,2] | +/// +---------+ +/// ``` +#[derive(Debug, Clone, Default)] +pub struct ScalarAddFunction; + +impl Function for ScalarAddFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ConcreteDataType::float64_datatype()], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + let arg0 = &columns[0]; + let arg1 = &columns[1]; + + let len = arg0.len(); + let mut result = BinaryVectorBuilder::with_capacity(len); + if len == 0 { + return Ok(result.to_vector()); + } + + let arg1_const = as_veclit_if_const(arg1)?; + + for i in 0..len { + let arg0 = arg0.get(i).as_f64_lossy(); + let Some(arg0) = arg0 else { + result.push_null(); + continue; + }; + + let arg1 = match arg1_const.as_ref() { + Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())), + None => as_veclit(arg1.get_ref(i))?, + }; + let Some(arg1) = arg1 else { + result.push_null(); + continue; + }; + + let vec = DVectorView::from_slice(&arg1, arg1.len()); + let vec_res = vec.add_scalar(arg0 as _); + + let veclit = vec_res.as_slice(); + let binlit = veclit_to_binlit(veclit); + result.push(Some(&binlit)); + } + + Ok(result.to_vector()) + } +} + +impl Display for ScalarAddFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use datatypes::vectors::{Float32Vector, StringVector}; + + use super::*; + + #[test] + fn test_scalar_add() { + let func = ScalarAddFunction; + + let input0 = Arc::new(Float32Vector::from(vec![ + Some(1.0), + Some(-1.0), + None, + Some(3.0), + ])); + let input1 = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[4.0,5.0,6.0]".to_string()), + Some("[7.0,8.0,9.0]".to_string()), + None, + ])); + + let result = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 4); + assert_eq!( + result.get_ref(0).as_binary().unwrap(), + Some(veclit_to_binlit(&[2.0, 3.0, 4.0]).as_slice()) + ); + assert_eq!( + result.get_ref(1).as_binary().unwrap(), + Some(veclit_to_binlit(&[3.0, 4.0, 5.0]).as_slice()) + ); + assert!(result.get_ref(2).is_null()); + assert!(result.get_ref(3).is_null()); + } +} diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.result b/tests/cases/standalone/common/function/vector/vector_scalar.result new file mode 100644 index 000000000000..5750a0adfdb8 --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_scalar.result @@ -0,0 +1,48 @@ +SELECT vec_to_string(vec_scalar_add(1.0, '[1.0, 2.0]')); + ++--------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(1),Utf8("[1.0, 2.0]"))) | ++--------------------------------------------------------------+ +| [2,3] | ++--------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1.0, '[1.0, 2.0]')); + ++---------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(-1),Utf8("[1.0, 2.0]"))) | ++---------------------------------------------------------------+ +| [0,1] | ++---------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(1.0, parse_vec('[1.0, 2.0]'))); + ++-------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(1),parse_vec(Utf8("[1.0, 2.0]")))) | ++-------------------------------------------------------------------------+ +| [2,3] | ++-------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); + ++--------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Float64(-1),parse_vec(Utf8("[1.0, 2.0]")))) | ++--------------------------------------------------------------------------+ +| [0,1] | ++--------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); + ++------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Int64(1),Utf8("[1.0, 2.0]"))) | ++------------------------------------------------------------+ +| [2,3] | ++------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); + ++-------------------------------------------------------------+ +| vec_to_string(vec_scalar_add(Int64(-1),Utf8("[1.0, 2.0]"))) | ++-------------------------------------------------------------+ +| [0,1] | ++-------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.sql b/tests/cases/standalone/common/function/vector/vector_scalar.sql new file mode 100644 index 000000000000..e438ac6a40ba --- /dev/null +++ b/tests/cases/standalone/common/function/vector/vector_scalar.sql @@ -0,0 +1,11 @@ +SELECT vec_to_string(vec_scalar_add(1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(-1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); From 1a8e77a480cdd0b4d625c919b3594b27ddf76207 Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Wed, 11 Dec 2024 17:28:13 +0800 Subject: [PATCH 08/59] test: part of parser test migrated from duckdb (#5125) * test: update test * fix: fix test --- .../standalone/common/parser/parser.result | 50 +++++++++++++++++++ .../cases/standalone/common/parser/parser.sql | 35 +++++++++++++ 2 files changed, 85 insertions(+) create mode 100644 tests/cases/standalone/common/parser/parser.result create mode 100644 tests/cases/standalone/common/parser/parser.sql diff --git a/tests/cases/standalone/common/parser/parser.result b/tests/cases/standalone/common/parser/parser.result new file mode 100644 index 000000000000..7e6dce85b79b --- /dev/null +++ b/tests/cases/standalone/common/parser/parser.result @@ -0,0 +1,50 @@ +-- columns aliases, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/columns_aliases.test +CREATE TABLE integers (ts TIMESTAMP TIME INDEX, i INT, j INT); + +Affected Rows: 0 + +INSERT INTO integers SELECT 0::TIMESTAMP ts, 42 i, 84 j UNION ALL SELECT 1::TIMESTAMP, 13, 14; + +Affected Rows: 2 + +SELECT i, j FROM (SELECT COLUMNS(*)::VARCHAR FROM integers); + +Error: 3000(PlanQuery), Failed to plan SQL: Error during planning: Invalid function 'columns'. +Did you mean 'COUNT'? + +SELECT i, j FROM (SELECT * FROM integers); + ++----+----+ +| i | j | ++----+----+ +| 42 | 84 | +| 13 | 14 | ++----+----+ + +SELECT min_i, min_j, max_i, max_j FROM (SELECT MIN(i) AS "min_i", MAX(i) AS "max_i", MIN(j) AS "min_j", MAX(j) AS "max_j" FROM integers); + ++-------+-------+-------+-------+ +| min_i | min_j | max_i | max_j | ++-------+-------+-------+-------+ +| 13 | 14 | 42 | 84 | ++-------+-------+-------+-------+ + +DROP TABLE integers; + +Affected Rows: 0 + +-- skipped, unsupported feature: digit separators +-- SELECT 1_000_000; +-- skipped, unsupported feature: division operator precedence +-- SELECT 6 + 1 // 2; +-- expression depth, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/expression_depth_limit.test +SELECT (1+(1+(1+(1+(1+(1+(1+1))))))); + ++---------------------------------------------------------------------------------------+ +| Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) + Int64(1) | ++---------------------------------------------------------------------------------------+ +| 8 | ++---------------------------------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/parser/parser.sql b/tests/cases/standalone/common/parser/parser.sql new file mode 100644 index 000000000000..bd7dcbf400c3 --- /dev/null +++ b/tests/cases/standalone/common/parser/parser.sql @@ -0,0 +1,35 @@ + +-- columns aliases, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/columns_aliases.test + +CREATE TABLE integers (ts TIMESTAMP TIME INDEX, i INT, j INT); + +INSERT INTO integers SELECT 0::TIMESTAMP ts, 42 i, 84 j UNION ALL SELECT 1::TIMESTAMP, 13, 14; + +SELECT i, j FROM (SELECT COLUMNS(*)::VARCHAR FROM integers); + +SELECT i, j FROM (SELECT * FROM integers); + +SELECT min_i, min_j, max_i, max_j FROM (SELECT MIN(i) AS "min_i", MAX(i) AS "max_i", MIN(j) AS "min_j", MAX(j) AS "max_j" FROM integers); + +DROP TABLE integers; + +-- skipped, unsupported feature: digit separators +-- SELECT 1_000_000; + +-- skipped, unsupported feature: division operator precedence +-- SELECT 6 + 1 // 2; + +-- expression depth, from: +-- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/parser/expression_depth_limit.test +SELECT (1+(1+(1+(1+(1+(1+(1+1))))))); + +-- skipped, unsupported feature: dollar quotes +-- SELECT $$$$ = ''; + +-- skipped, unsupported feature: from_first, see also: +-- https://github.com/GreptimeTeam/greptimedb/issues/5012 +-- FROM integers; + +-- skipped, unsupported feature: function chaining +-- SELECT "abcd".upper().lower(); From 9da2e17d0e0a6302e243f8fefe1c636b0497d45d Mon Sep 17 00:00:00 2001 From: ZonaHe Date: Wed, 11 Dec 2024 20:47:59 +0800 Subject: [PATCH 09/59] feat: update dashboard to v0.7.2 (#5141) Co-authored-by: sunchanglong --- src/servers/dashboard/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/servers/dashboard/VERSION b/src/servers/dashboard/VERSION index 63f2359f6421..2c0a9c7b7754 100644 --- a/src/servers/dashboard/VERSION +++ b/src/servers/dashboard/VERSION @@ -1 +1 @@ -v0.7.1 +v0.7.2 From 60f8dbf7f01dc08e43b1145f7444ff467d741e38 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 11 Dec 2024 21:33:54 +0800 Subject: [PATCH 10/59] feat: implement `v1/sql/parse` endpoint to parse GreptimeDB's SQL dialect (#5144) * derive ser/de Signed-off-by: Ruihang Xia * impl method Signed-off-by: Ruihang Xia * fix typo Signed-off-by: Ruihang Xia * remove deserialize Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- Cargo.lock | 2 ++ Cargo.toml | 1 + src/servers/src/error.rs | 10 ++++++++- src/servers/src/http.rs | 4 ++++ src/servers/src/http/handler.rs | 28 +++++++++++++++++++++++- src/sql/Cargo.toml | 1 + src/sql/src/statements/admin.rs | 3 ++- src/sql/src/statements/alter.rs | 11 +++++----- src/sql/src/statements/copy.rs | 11 +++++----- src/sql/src/statements/create.rs | 21 +++++++++--------- src/sql/src/statements/cursor.rs | 7 +++--- src/sql/src/statements/delete.rs | 3 ++- src/sql/src/statements/describe.rs | 3 ++- src/sql/src/statements/drop.rs | 9 ++++---- src/sql/src/statements/explain.rs | 3 ++- src/sql/src/statements/insert.rs | 3 ++- src/sql/src/statements/option_map.rs | 4 +++- src/sql/src/statements/query.rs | 3 ++- src/sql/src/statements/set_variables.rs | 3 ++- src/sql/src/statements/show.rs | 29 +++++++++++++------------ src/sql/src/statements/statement.rs | 7 ++++-- src/sql/src/statements/tql.rs | 9 ++++---- src/sql/src/statements/truncate.rs | 3 ++- tests-integration/tests/http.rs | 8 +++++++ 24 files changed, 128 insertions(+), 58 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 628c6a582418..311caafcb2fe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -11295,6 +11295,7 @@ dependencies = [ "jsonb", "lazy_static", "regex", + "serde", "serde_json", "snafu 0.8.5", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", @@ -11371,6 +11372,7 @@ dependencies = [ "lazy_static", "log", "regex", + "serde", "sqlparser 0.45.0 (registry+https://github.com/rust-lang/crates.io-index)", "sqlparser_derive 0.2.2 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", ] diff --git a/Cargo.toml b/Cargo.toml index d1d360850e70..990bc71a907b 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -180,6 +180,7 @@ sysinfo = "0.30" # on branch v0.44.x sqlparser = { git = "https://github.com/GreptimeTeam/sqlparser-rs.git", rev = "54a267ac89c09b11c0c88934690530807185d3e7", features = [ "visitor", + "serde", ] } strum = { version = "0.25", features = ["derive"] } tempfile = "3" diff --git a/src/servers/src/error.rs b/src/servers/src/error.rs index 6682a1c78967..071de93683cc 100644 --- a/src/servers/src/error.rs +++ b/src/servers/src/error.rs @@ -189,6 +189,13 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to parse query"))] + FailedToParseQuery { + #[snafu(implicit)] + location: Location, + source: sql::error::Error, + }, + #[snafu(display("Failed to parse InfluxDB line protocol"))] InfluxdbLineProtocol { #[snafu(implicit)] @@ -651,7 +658,8 @@ impl ErrorExt for Error { | OpenTelemetryLog { .. } | UnsupportedJsonDataTypeForTag { .. } | InvalidTableName { .. } - | PrepareStatementNotFound { .. } => StatusCode::InvalidArguments, + | PrepareStatementNotFound { .. } + | FailedToParseQuery { .. } => StatusCode::InvalidArguments, Catalog { source, .. } => source.status_code(), RowWriter { source, .. } => source.status_code(), diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index d8d07ed31fa0..1107870c9a25 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -755,6 +755,10 @@ impl HttpServer { fn route_sql(api_state: ApiState) -> Router { Router::new() .route("/sql", routing::get(handler::sql).post(handler::sql)) + .route( + "/sql/parse", + routing::get(handler::sql_parse).post(handler::sql_parse), + ) .route( "/promql", routing::get(handler::promql).post(handler::promql), diff --git a/src/servers/src/http/handler.rs b/src/servers/src/http/handler.rs index 15a1a0e16c73..153b824d6ef1 100644 --- a/src/servers/src/http/handler.rs +++ b/src/servers/src/http/handler.rs @@ -30,8 +30,13 @@ use query::parser::{PromQuery, DEFAULT_LOOKBACK_STRING}; use serde::{Deserialize, Serialize}; use serde_json::Value; use session::context::{Channel, QueryContext, QueryContextRef}; +use snafu::ResultExt; +use sql::dialect::GreptimeDbDialect; +use sql::parser::{ParseOptions, ParserContext}; +use sql::statements::statement::Statement; use super::header::collect_plan_metrics; +use crate::error::{FailedToParseQuerySnafu, InvalidQuerySnafu, Result}; use crate::http::result::arrow_result::ArrowResponse; use crate::http::result::csv_result::CsvResponse; use crate::http::result::error_result::ErrorResponse; @@ -146,10 +151,31 @@ pub async fn sql( resp.with_execution_time(start.elapsed().as_millis() as u64) } +/// Handler to parse sql +#[axum_macros::debug_handler] +#[tracing::instrument(skip_all, fields(protocol = "http", request_type = "sql"))] +pub async fn sql_parse( + Query(query_params): Query, + Form(form_params): Form, +) -> Result>> { + let Some(sql) = query_params.sql.or(form_params.sql) else { + return InvalidQuerySnafu { + reason: "sql parameter is required.", + } + .fail(); + }; + + let stmts = + ParserContext::create_with_dialect(&sql, &GreptimeDbDialect {}, ParseOptions::default()) + .context(FailedToParseQuerySnafu)?; + + Ok(stmts.into()) +} + /// Create a response from query result pub async fn from_output( outputs: Vec>, -) -> Result<(Vec, HashMap), ErrorResponse> { +) -> std::result::Result<(Vec, HashMap), ErrorResponse> { // TODO(sunng87): this api response structure cannot represent error well. // It hides successful execution results from error response let mut results = Vec::with_capacity(outputs.len()); diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index e3340a8f6c90..3cb81d6dd494 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -30,6 +30,7 @@ itertools.workspace = true jsonb.workspace = true lazy_static.workspace = true regex.workspace = true +serde.workspace = true serde_json.workspace = true snafu.workspace = true sqlparser.workspace = true diff --git a/src/sql/src/statements/admin.rs b/src/sql/src/statements/admin.rs index bbe805a4c163..ed068ea47510 100644 --- a/src/sql/src/statements/admin.rs +++ b/src/sql/src/statements/admin.rs @@ -14,12 +14,13 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::Function; /// `ADMIN` statement to execute some administration commands. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Admin { /// Run a admin function. Func(Function), diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index cf59257e8931..174bdbbdc310 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -18,10 +18,11 @@ use api::v1; use common_query::AddColumnLocation; use datatypes::schema::FulltextOptions; use itertools::Itertools; +use serde::Serialize; use sqlparser::ast::{ColumnDef, DataType, Ident, ObjectName, TableConstraint}; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct AlterTable { pub table_name: ObjectName, pub alter_operation: AlterTableOperation, @@ -56,7 +57,7 @@ impl Display for AlterTable { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum AlterTableOperation { /// `ADD ` AddConstraint(TableConstraint), @@ -151,7 +152,7 @@ impl Display for AlterTableOperation { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct KeyValueOption { pub key: String, pub value: String, @@ -166,7 +167,7 @@ impl From for v1::Option { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct AlterDatabase { pub database_name: ObjectName, pub alter_operation: AlterDatabaseOperation, @@ -197,7 +198,7 @@ impl Display for AlterDatabase { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum AlterDatabaseOperation { SetDatabaseOption { options: Vec }, UnsetDatabaseOption { keys: Vec }, diff --git a/src/sql/src/statements/copy.rs b/src/sql/src/statements/copy.rs index c68b9d8c0321..436d86d3abaf 100644 --- a/src/sql/src/statements/copy.rs +++ b/src/sql/src/statements/copy.rs @@ -14,12 +14,13 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; use crate::statements::OptionMap; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Copy { CopyTable(CopyTable), CopyDatabase(CopyDatabase), @@ -34,7 +35,7 @@ impl Display for Copy { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum CopyTable { To(CopyTableArgument), From(CopyTableArgument), @@ -65,7 +66,7 @@ impl Display for CopyTable { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum CopyDatabase { To(CopyDatabaseArgument), From(CopyDatabaseArgument), @@ -96,7 +97,7 @@ impl Display for CopyDatabase { } } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CopyDatabaseArgument { pub database_name: ObjectName, pub with: OptionMap, @@ -104,7 +105,7 @@ pub struct CopyDatabaseArgument { pub location: String, } -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CopyTableArgument { pub table_name: ObjectName, pub with: OptionMap, diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index 20ed7b555965..e4ea46572e5f 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -18,6 +18,7 @@ use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; use datatypes::schema::FulltextOptions; use itertools::Itertools; +use serde::Serialize; use snafu::ResultExt; use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; @@ -58,7 +59,7 @@ fn format_table_constraint(constraints: &[TableConstraint]) -> String { } /// Table constraint for create table statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub enum TableConstraint { /// Primary key constraint. PrimaryKey { columns: Vec }, @@ -84,7 +85,7 @@ impl Display for TableConstraint { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateTable { /// Create if not exists pub if_not_exists: bool, @@ -100,7 +101,7 @@ pub struct CreateTable { } /// Column definition in `CREATE TABLE` statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct Column { /// `ColumnDef` from `sqlparser::ast` pub column_def: ColumnDef, @@ -109,7 +110,7 @@ pub struct Column { } /// Column extensions for greptimedb dialect. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default, Serialize)] pub struct ColumnExtensions { /// Fulltext options. pub fulltext_options: Option, @@ -172,7 +173,7 @@ impl ColumnExtensions { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct Partitions { pub column_list: Vec, pub exprs: Vec, @@ -244,7 +245,7 @@ impl Display for CreateTable { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateDatabase { pub name: ObjectName, /// Create if not exists @@ -278,7 +279,7 @@ impl Display for CreateDatabase { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateExternalTable { /// Table name pub name: ObjectName, @@ -309,7 +310,7 @@ impl Display for CreateExternalTable { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateTableLike { /// Table name pub table_name: ObjectName, @@ -325,7 +326,7 @@ impl Display for CreateTableLike { } } -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateFlow { /// Flow name pub flow_name: ObjectName, @@ -367,7 +368,7 @@ impl Display for CreateFlow { } /// Create SQL view statement. -#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] pub struct CreateView { /// View name pub name: ObjectName, diff --git a/src/sql/src/statements/cursor.rs b/src/sql/src/statements/cursor.rs index 72ef4cdcae98..4381cc5e7be5 100644 --- a/src/sql/src/statements/cursor.rs +++ b/src/sql/src/statements/cursor.rs @@ -14,6 +14,7 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; @@ -22,7 +23,7 @@ use super::query::Query; /// Represents a DECLARE CURSOR statement /// /// This statement will carry a SQL query -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DeclareCursor { pub cursor_name: ObjectName, pub query: Box, @@ -35,7 +36,7 @@ impl Display for DeclareCursor { } /// Represents a FETCH FROM cursor statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct FetchCursor { pub cursor_name: ObjectName, pub fetch_size: u64, @@ -48,7 +49,7 @@ impl Display for FetchCursor { } /// Represents a CLOSE cursor statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct CloseCursor { pub cursor_name: ObjectName, } diff --git a/src/sql/src/statements/delete.rs b/src/sql/src/statements/delete.rs index 4346610b7d19..dc8f5d69014e 100644 --- a/src/sql/src/statements/delete.rs +++ b/src/sql/src/statements/delete.rs @@ -12,10 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. +use serde::Serialize; use sqlparser::ast::Statement; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Delete { pub inner: Statement, } diff --git a/src/sql/src/statements/describe.rs b/src/sql/src/statements/describe.rs index 743f2b0123c2..1a7bba24e5d3 100644 --- a/src/sql/src/statements/describe.rs +++ b/src/sql/src/statements/describe.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// SQL structure for `DESCRIBE TABLE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DescribeTable { name: ObjectName, } diff --git a/src/sql/src/statements/drop.rs b/src/sql/src/statements/drop.rs index a46450db78f7..799722904dab 100644 --- a/src/sql/src/statements/drop.rs +++ b/src/sql/src/statements/drop.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// DROP TABLE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropTable { table_names: Vec, @@ -62,7 +63,7 @@ impl Display for DropTable { } /// DROP DATABASE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropDatabase { name: ObjectName, /// drop table if exists @@ -99,7 +100,7 @@ impl Display for DropDatabase { } /// DROP FLOW statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropFlow { flow_name: ObjectName, /// drop flow if exists @@ -138,7 +139,7 @@ impl Display for DropFlow { } /// `DROP VIEW` statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct DropView { // The view name pub view_name: ObjectName, diff --git a/src/sql/src/statements/explain.rs b/src/sql/src/statements/explain.rs index 5b3a2671f939..96a12c7a41c6 100644 --- a/src/sql/src/statements/explain.rs +++ b/src/sql/src/statements/explain.rs @@ -14,13 +14,14 @@ use std::fmt::{Display, Formatter}; +use serde::Serialize; use sqlparser::ast::Statement as SpStatement; use sqlparser_derive::{Visit, VisitMut}; use crate::error::Error; /// Explain statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Explain { pub inner: SpStatement, } diff --git a/src/sql/src/statements/insert.rs b/src/sql/src/statements/insert.rs index 4eae7f1e1874..f1c0b7144441 100644 --- a/src/sql/src/statements/insert.rs +++ b/src/sql/src/statements/insert.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use serde::Serialize; use sqlparser::ast::{ObjectName, Query, SetExpr, Statement, UnaryOperator, Values}; use sqlparser::parser::ParserError; use sqlparser_derive::{Visit, VisitMut}; @@ -20,7 +21,7 @@ use crate::ast::{Expr, Value}; use crate::error::Result; use crate::statements::query::Query as GtQuery; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Insert { // Can only be sqlparser::ast::Statement::Insert variant pub inner: Statement, diff --git a/src/sql/src/statements/option_map.rs b/src/sql/src/statements/option_map.rs index 9ff8d94312fd..d66cadf16461 100644 --- a/src/sql/src/statements/option_map.rs +++ b/src/sql/src/statements/option_map.rs @@ -16,14 +16,16 @@ use std::collections::{BTreeMap, HashMap}; use std::ops::ControlFlow; use common_base::secrets::{ExposeSecret, ExposeSecretMut, SecretString}; +use serde::Serialize; use sqlparser::ast::{Visit, VisitMut, Visitor, VisitorMut}; const REDACTED_OPTIONS: [&str; 2] = ["access_key_id", "secret_access_key"]; /// Options hashmap. -#[derive(Clone, Debug, Default)] +#[derive(Clone, Debug, Default, Serialize)] pub struct OptionMap { options: BTreeMap, + #[serde(skip_serializing)] secrets: BTreeMap, } diff --git a/src/sql/src/statements/query.rs b/src/sql/src/statements/query.rs index 3b571a1a0ba1..b5221a226356 100644 --- a/src/sql/src/statements/query.rs +++ b/src/sql/src/statements/query.rs @@ -14,13 +14,14 @@ use std::fmt; +use serde::Serialize; use sqlparser::ast::Query as SpQuery; use sqlparser_derive::{Visit, VisitMut}; use crate::error::Error; /// Query statement instance. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct Query { pub inner: SpQuery, } diff --git a/src/sql/src/statements/set_variables.rs b/src/sql/src/statements/set_variables.rs index 7a2a94a531df..748d077d84ce 100644 --- a/src/sql/src/statements/set_variables.rs +++ b/src/sql/src/statements/set_variables.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::{Expr, ObjectName}; use sqlparser_derive::{Visit, VisitMut}; /// SET variables statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct SetVariables { pub variable: ObjectName, pub value: Vec, diff --git a/src/sql/src/statements/show.rs b/src/sql/src/statements/show.rs index f6a8dab72897..055cd7768f02 100644 --- a/src/sql/src/statements/show.rs +++ b/src/sql/src/statements/show.rs @@ -14,12 +14,13 @@ use std::fmt::{self, Display}; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{Expr, Ident, ObjectName}; /// Show kind for SQL expressions like `SHOW DATABASE` or `SHOW TABLE` -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum ShowKind { All, Like(Ident), @@ -46,14 +47,14 @@ macro_rules! format_kind { } /// SQL structure for `SHOW DATABASES`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowDatabases { pub kind: ShowKind, pub full: bool, } /// The SQL `SHOW COLUMNS` statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowColumns { pub kind: ShowKind, pub table: String, @@ -77,7 +78,7 @@ impl Display for ShowColumns { } /// The SQL `SHOW INDEX` statement -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowIndex { pub kind: ShowKind, pub table: String, @@ -118,7 +119,7 @@ impl Display for ShowDatabases { } /// SQL structure for `SHOW TABLES`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowTables { pub kind: ShowKind, pub database: Option, @@ -142,7 +143,7 @@ impl Display for ShowTables { } /// SQL structure for `SHOW TABLE STATUS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowTableStatus { pub kind: ShowKind, pub database: Option, @@ -162,7 +163,7 @@ impl Display for ShowTableStatus { } /// SQL structure for `SHOW CREATE DATABASE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateDatabase { pub database_name: ObjectName, } @@ -175,7 +176,7 @@ impl Display for ShowCreateDatabase { } /// SQL structure for `SHOW CREATE TABLE`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateTable { pub table_name: ObjectName, } @@ -188,7 +189,7 @@ impl Display for ShowCreateTable { } /// SQL structure for `SHOW CREATE FLOW`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateFlow { pub flow_name: ObjectName, } @@ -201,7 +202,7 @@ impl Display for ShowCreateFlow { } /// SQL structure for `SHOW FLOWS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowFlows { pub kind: ShowKind, pub database: Option, @@ -220,7 +221,7 @@ impl Display for ShowFlows { } /// SQL structure for `SHOW CREATE VIEW`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateView { pub view_name: ObjectName, } @@ -233,7 +234,7 @@ impl Display for ShowCreateView { } /// SQL structure for `SHOW VIEWS`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowViews { pub kind: ShowKind, pub database: Option, @@ -252,7 +253,7 @@ impl Display for ShowViews { } /// SQL structure for `SHOW VARIABLES xxx`. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowVariables { pub variable: ObjectName, } @@ -265,7 +266,7 @@ impl Display for ShowVariables { } /// SQL structure for "SHOW STATUS" -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowStatus {} impl Display for ShowStatus { diff --git a/src/sql/src/statements/statement.rs b/src/sql/src/statements/statement.rs index 8ad391a00dd2..2870f2b64a6a 100644 --- a/src/sql/src/statements/statement.rs +++ b/src/sql/src/statements/statement.rs @@ -15,12 +15,14 @@ use std::fmt::Display; use datafusion_sql::parser::Statement as DfStatement; +use serde::Serialize; use sqlparser::ast::Statement as SpStatement; use sqlparser_derive::{Visit, VisitMut}; use crate::error::{ConvertToDfStatementSnafu, Error}; use crate::statements::admin::Admin; use crate::statements::alter::{AlterDatabase, AlterTable}; +use crate::statements::copy::Copy; use crate::statements::create::{ CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, }; @@ -42,7 +44,7 @@ use crate::statements::truncate::TruncateTable; /// Tokens parsed by `DFParser` are converted into these values. #[allow(clippy::large_enum_variant)] -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Statement { // Query Query(Box), @@ -107,7 +109,8 @@ pub enum Statement { // EXPLAIN QUERY Explain(Explain), // COPY - Copy(crate::statements::copy::Copy), + Copy(Copy), + // Telemetry Query Language Tql(Tql), // TRUNCATE TABLE TruncateTable(TruncateTable), diff --git a/src/sql/src/statements/tql.rs b/src/sql/src/statements/tql.rs index 0f7a85f95ab8..7980103431ef 100644 --- a/src/sql/src/statements/tql.rs +++ b/src/sql/src/statements/tql.rs @@ -14,9 +14,10 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser_derive::{Visit, VisitMut}; -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub enum Tql { Eval(TqlEval), Explain(TqlExplain), @@ -49,7 +50,7 @@ fn format_tql( } /// TQL EVAL (, , , [lookback]) -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlEval { pub start: String, pub end: String, @@ -74,7 +75,7 @@ impl Display for TqlEval { /// TQL EXPLAIN [VERBOSE] [, , , [lookback]] /// doesn't execute the query but tells how the query would be executed (similar to SQL EXPLAIN). -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlExplain { pub start: String, pub end: String, @@ -103,7 +104,7 @@ impl Display for TqlExplain { /// TQL ANALYZE [VERBOSE] (, , , [lookback]) /// executes the plan and tells the detailed per-step execution time (similar to SQL ANALYZE). -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TqlAnalyze { pub start: String, pub end: String, diff --git a/src/sql/src/statements/truncate.rs b/src/sql/src/statements/truncate.rs index c1a063f959ce..710b5f72df3c 100644 --- a/src/sql/src/statements/truncate.rs +++ b/src/sql/src/statements/truncate.rs @@ -14,11 +14,12 @@ use std::fmt::Display; +use serde::Serialize; use sqlparser::ast::ObjectName; use sqlparser_derive::{Visit, VisitMut}; /// TRUNCATE TABLE statement. -#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut)] +#[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct TruncateTable { table_name: ObjectName, } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 4da65f0b21f5..5a48fef39e43 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -361,6 +361,14 @@ pub async fn test_sql_api(store_type: StorageType) { let body = serde_json::from_str::(&res.text().await).unwrap(); assert_eq!(body.code(), ErrorCode::DatabaseNotFound as u32); + // test parse method + let res = client.get("/v1/sql/parse?sql=desc table t").send().await; + assert_eq!(res.status(), StatusCode::OK); + assert_eq!( + res.text().await, + "[{\"DescribeTable\":{\"name\":[{\"value\":\"t\",\"quote_style\":null}]}}]" + ); + // test timezone header let res = client .get("/v1/sql?&sql=show variables system_time_zone") From a8012147ab52f43513580f17ae210a2dbb439318 Mon Sep 17 00:00:00 2001 From: Niwaka <61189782+NiwakaDev@users.noreply.github.com> Date: Wed, 11 Dec 2024 22:46:23 +0900 Subject: [PATCH 11/59] feat: support push down IN filter (#5129) * feat: support push down IN filter * chore: move tests to prune.sql --- src/query/src/dist_plan/commutativity.rs | 2 +- .../standalone/common/select/prune.result | 26 +++++++++++++++++++ .../cases/standalone/common/select/prune.sql | 10 +++++++ 3 files changed, 37 insertions(+), 1 deletion(-) diff --git a/src/query/src/dist_plan/commutativity.rs b/src/query/src/dist_plan/commutativity.rs index 8166400b8fbd..45378e532c0c 100644 --- a/src/query/src/dist_plan/commutativity.rs +++ b/src/query/src/dist_plan/commutativity.rs @@ -146,6 +146,7 @@ impl Categorizer { | Expr::Between(_) | Expr::Sort(_) | Expr::Exists(_) + | Expr::InList(_) | Expr::ScalarFunction(_) => Commutativity::Commutative, Expr::Like(_) @@ -157,7 +158,6 @@ impl Categorizer { | Expr::TryCast(_) | Expr::AggregateFunction(_) | Expr::WindowFunction(_) - | Expr::InList(_) | Expr::InSubquery(_) | Expr::ScalarSubquery(_) | Expr::Wildcard { .. } => Commutativity::Unimplemented, diff --git a/tests/cases/standalone/common/select/prune.result b/tests/cases/standalone/common/select/prune.result index 13ddee5510d2..04282b6035a0 100644 --- a/tests/cases/standalone/common/select/prune.result +++ b/tests/cases/standalone/common/select/prune.result @@ -94,6 +94,32 @@ explain analyze select * from demo where idc='idc1'; |_|_| Total rows: 2_| +-+-+-+ +SELECT * FROM demo where host in ('test1'); + ++-------------------------+-------+-------+------+-----------+ +| ts | value | host | idc | collector | ++-------------------------+-------+-------+------+-----------+ +| 1970-01-01T00:00:00.001 | 2.0 | test1 | idc1 | disk | ++-------------------------+-------+-------+------+-----------+ + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze SELECT * FROM demo where host in ('test1'); + ++-+-+-+ +| stage | node | plan_| ++-+-+-+ +| 0_| 0_|_MergeScanExec: REDACTED +|_|_|_| +| 1_| 0_|_SeqScan: region=REDACTED, partition_count=1 (1 memtable ranges, 0 file 0 ranges) REDACTED +|_|_|_| +|_|_| Total rows: 1_| ++-+-+-+ + drop table demo; Affected Rows: 0 diff --git a/tests/cases/standalone/common/select/prune.sql b/tests/cases/standalone/common/select/prune.sql index e7fd643537a1..4b976cdb1c7d 100644 --- a/tests/cases/standalone/common/select/prune.sql +++ b/tests/cases/standalone/common/select/prune.sql @@ -27,4 +27,14 @@ select * from demo where collector='disk' order by ts; -- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED explain analyze select * from demo where idc='idc1'; +SELECT * FROM demo where host in ('test1'); + +-- SQLNESS REPLACE (metrics.*) REDACTED +-- SQLNESS REPLACE (RoundRobinBatch.*) REDACTED +-- SQLNESS REPLACE (-+) - +-- SQLNESS REPLACE (\s\s+) _ +-- SQLNESS REPLACE (peers.*) REDACTED +-- SQLNESS REPLACE region=\d+\(\d+,\s+\d+\) region=REDACTED +explain analyze SELECT * FROM demo where host in ('test1'); + drop table demo; From e2a41ccaec9976641dbaeeb4b1e6cec6f3d37783 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 11:13:36 +0800 Subject: [PATCH 12/59] feat: add prefetch support to `PuffinFileFooterReader` for reduced I/O time (#5145) * feat: introduce `PuffinFileFooterReader` * refactor: remove `SyncReader` trait and impl * refactor: replace `FooterParser` with `PuffinFileFooterReader` * chore: remove unused errors --- src/index/src/inverted_index/error.rs | 11 +- src/puffin/src/error.rs | 52 +--- src/puffin/src/file_format/reader.rs | 14 +- src/puffin/src/file_format/reader/file.rs | 73 +---- src/puffin/src/file_format/reader/footer.rs | 323 +++++--------------- src/puffin/src/tests.rs | 180 ++--------- 6 files changed, 130 insertions(+), 523 deletions(-) diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 07a42b8b8767..49816e63c463 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -26,14 +26,6 @@ use crate::inverted_index::search::predicate::Predicate; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Failed to seek"))] - Seek { - #[snafu(source)] - error: IoError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to read"))] Read { #[snafu(source)] @@ -215,8 +207,7 @@ impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - Seek { .. } - | Read { .. } + Read { .. } | Write { .. } | Flush { .. } | Close { .. } diff --git a/src/puffin/src/error.rs b/src/puffin/src/error.rs index 57aec44d1fb8..634ede5b1364 100644 --- a/src/puffin/src/error.rs +++ b/src/puffin/src/error.rs @@ -25,14 +25,6 @@ use snafu::{Location, Snafu}; #[snafu(visibility(pub))] #[stack_trace_debug] pub enum Error { - #[snafu(display("Failed to seek"))] - Seek { - #[snafu(source)] - error: IoError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to read"))] Read { #[snafu(source)] @@ -119,14 +111,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to convert bytes to integer"))] - BytesToInteger { - #[snafu(source)] - error: std::array::TryFromSliceError, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Unsupported decompression: {}", decompression))] UnsupportedDecompression { decompression: String, @@ -150,17 +134,15 @@ pub enum Error { location: Location, }, - #[snafu(display("Parse stage not match, expected: {}, actual: {}", expected, actual))] - ParseStageNotMatch { - expected: String, - actual: String, + #[snafu(display("Unexpected footer payload size: {}", size))] + UnexpectedFooterPayloadSize { + size: i32, #[snafu(implicit)] location: Location, }, - #[snafu(display("Unexpected footer payload size: {}", size))] - UnexpectedFooterPayloadSize { - size: i32, + #[snafu(display("Invalid puffin footer"))] + InvalidPuffinFooter { #[snafu(implicit)] location: Location, }, @@ -177,20 +159,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Invalid blob offset: {}, location: {:?}", offset, location))] - InvalidBlobOffset { - offset: i64, - #[snafu(implicit)] - location: Location, - }, - - #[snafu(display("Invalid blob area end: {}, location: {:?}", offset, location))] - InvalidBlobAreaEnd { - offset: u64, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to compress lz4"))] Lz4Compression { #[snafu(source)] @@ -262,8 +230,7 @@ impl ErrorExt for Error { fn status_code(&self) -> StatusCode { use Error::*; match self { - Seek { .. } - | Read { .. } + Read { .. } | MagicNotMatched { .. } | DeserializeJson { .. } | Write { .. } @@ -275,18 +242,15 @@ impl ErrorExt for Error { | Remove { .. } | Rename { .. } | SerializeJson { .. } - | BytesToInteger { .. } - | ParseStageNotMatch { .. } | UnexpectedFooterPayloadSize { .. } | UnexpectedPuffinFileSize { .. } - | InvalidBlobOffset { .. } - | InvalidBlobAreaEnd { .. } | Lz4Compression { .. } | Lz4Decompression { .. } | BlobNotFound { .. } | BlobIndexOutOfBound { .. } | FileKeyNotMatch { .. } - | WalkDir { .. } => StatusCode::Unexpected, + | WalkDir { .. } + | InvalidPuffinFooter { .. } => StatusCode::Unexpected, UnsupportedCompression { .. } | UnsupportedDecompression { .. } => { StatusCode::Unsupported diff --git a/src/puffin/src/file_format/reader.rs b/src/puffin/src/file_format/reader.rs index 3f48bf4b105e..162d7116a578 100644 --- a/src/puffin/src/file_format/reader.rs +++ b/src/puffin/src/file_format/reader.rs @@ -21,21 +21,9 @@ use common_base::range_read::RangeReader; use crate::blob_metadata::BlobMetadata; use crate::error::Result; pub use crate::file_format::reader::file::PuffinFileReader; +pub use crate::file_format::reader::footer::PuffinFileFooterReader; use crate::file_metadata::FileMetadata; -/// `SyncReader` defines a synchronous reader for puffin data. -pub trait SyncReader<'a> { - type Reader: std::io::Read + std::io::Seek; - - /// Fetches the FileMetadata. - fn metadata(&'a mut self) -> Result; - - /// Reads particular blob data based on given metadata. - /// - /// Data read from the reader is compressed leaving the caller to decompress the data. - fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result; -} - /// `AsyncReader` defines an asynchronous reader for puffin data. #[async_trait] pub trait AsyncReader<'a> { diff --git a/src/puffin/src/file_format/reader/file.rs b/src/puffin/src/file_format/reader/file.rs index 3736ed5d2d8d..31e8e10bc4d5 100644 --- a/src/puffin/src/file_format/reader/file.rs +++ b/src/puffin/src/file_format/reader/file.rs @@ -12,20 +12,15 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::{self, SeekFrom}; - use async_trait::async_trait; use common_base::range_read::RangeReader; use snafu::{ensure, ResultExt}; use crate::blob_metadata::BlobMetadata; -use crate::error::{ - MagicNotMatchedSnafu, ReadSnafu, Result, SeekSnafu, UnexpectedPuffinFileSizeSnafu, - UnsupportedDecompressionSnafu, -}; -use crate::file_format::reader::footer::FooterParser; -use crate::file_format::reader::{AsyncReader, SyncReader}; -use crate::file_format::{MAGIC, MAGIC_SIZE, MIN_FILE_SIZE}; +use crate::error::{ReadSnafu, Result, UnexpectedPuffinFileSizeSnafu}; +use crate::file_format::reader::footer::DEFAULT_PREFETCH_SIZE; +use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader}; +use crate::file_format::MIN_FILE_SIZE; use crate::file_metadata::FileMetadata; use crate::partial_reader::PartialReader; @@ -72,45 +67,6 @@ impl PuffinFileReader { } } -impl<'a, R: io::Read + io::Seek + 'a> SyncReader<'a> for PuffinFileReader { - type Reader = PartialReader<&'a mut R>; - - fn metadata(&mut self) -> Result { - if let Some(metadata) = &self.metadata { - return Ok(metadata.clone()); - } - - // check the magic - let mut magic = [0; MAGIC_SIZE as usize]; - self.source.read_exact(&mut magic).context(ReadSnafu)?; - ensure!(magic == MAGIC, MagicNotMatchedSnafu); - - let file_size = self.get_file_size_sync()?; - - // parse the footer - let metadata = FooterParser::new(&mut self.source, file_size).parse_sync()?; - self.metadata = Some(metadata.clone()); - Ok(metadata) - } - - fn blob_reader(&'a mut self, blob_metadata: &BlobMetadata) -> Result { - // TODO(zhongzc): support decompression - let compression = blob_metadata.compression_codec.as_ref(); - ensure!( - compression.is_none(), - UnsupportedDecompressionSnafu { - decompression: compression.unwrap().to_string() - } - ); - - Ok(PartialReader::new( - &mut self.source, - blob_metadata.offset as _, - blob_metadata.length as _, - )) - } -} - #[async_trait] impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { type Reader = PartialReader<&'a mut R>; @@ -119,17 +75,10 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { if let Some(metadata) = &self.metadata { return Ok(metadata.clone()); } - - // check the magic - let magic = self.source.read(0..MAGIC_SIZE).await.context(ReadSnafu)?; - ensure!(*magic == MAGIC, MagicNotMatchedSnafu); - let file_size = self.get_file_size_async().await?; - - // parse the footer - let metadata = FooterParser::new(&mut self.source, file_size) - .parse_async() - .await?; + let mut reader = PuffinFileFooterReader::new(&mut self.source, file_size) + .with_prefetch_size(DEFAULT_PREFETCH_SIZE); + let metadata = reader.metadata().await?; self.metadata = Some(metadata.clone()); Ok(metadata) } @@ -143,14 +92,6 @@ impl<'a, R: RangeReader + 'a> AsyncReader<'a> for PuffinFileReader { } } -impl PuffinFileReader { - fn get_file_size_sync(&mut self) -> Result { - let file_size = self.source.seek(SeekFrom::End(0)).context(SeekSnafu)?; - Self::validate_file_size(file_size)?; - Ok(file_size) - } -} - impl PuffinFileReader { async fn get_file_size_async(&mut self) -> Result { let file_size = self diff --git a/src/puffin/src/file_format/reader/footer.rs b/src/puffin/src/file_format/reader/footer.rs index aa764fd32a21..d0cd1e8ed4f0 100644 --- a/src/puffin/src/file_format/reader/footer.rs +++ b/src/puffin/src/file_format/reader/footer.rs @@ -12,240 +12,98 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::io::{self, Cursor, SeekFrom}; +use std::io::Cursor; use common_base::range_read::RangeReader; use snafu::{ensure, ResultExt}; use crate::error::{ - BytesToIntegerSnafu, DeserializeJsonSnafu, InvalidBlobAreaEndSnafu, InvalidBlobOffsetSnafu, - Lz4DecompressionSnafu, MagicNotMatchedSnafu, ParseStageNotMatchSnafu, ReadSnafu, Result, - SeekSnafu, UnexpectedFooterPayloadSizeSnafu, + DeserializeJsonSnafu, InvalidPuffinFooterSnafu, Lz4DecompressionSnafu, MagicNotMatchedSnafu, + ReadSnafu, Result, UnexpectedFooterPayloadSizeSnafu, }; use crate::file_format::{Flags, FLAGS_SIZE, MAGIC, MAGIC_SIZE, MIN_FILE_SIZE, PAYLOAD_SIZE_SIZE}; use crate::file_metadata::FileMetadata; -/// Parser for the footer of a Puffin data file +/// The default prefetch size for the footer reader. +pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB + +/// Reader for the footer of a Puffin data file /// /// The footer has a specific layout that needs to be read and parsed to /// extract metadata about the file, which is encapsulated in the [`FileMetadata`] type. /// +/// This reader supports prefetching, allowing for more efficient reading +/// of the footer by fetching additional data ahead of time. +/// /// ```text /// Footer layout: HeadMagic Payload PayloadSize Flags FootMagic /// [4] [?] [4] [4] [4] /// ``` -pub struct FooterParser { - // The underlying IO source +pub struct PuffinFileFooterReader { + /// The source of the puffin file source: R, - - // The size of the file, used for calculating offsets to read from - file_size: u64, -} - -impl FooterParser { - pub fn new(source: R, file_size: u64) -> Self { - Self { source, file_size } - } -} - -impl FooterParser { - /// Parses the footer from the IO source in a synchronous manner. - pub fn parse_sync(&mut self) -> Result { - let mut parser = StageParser::new(self.file_size); - - let mut buf = vec![]; - while let Some(byte_to_read) = parser.next_to_read() { - self.source - .seek(SeekFrom::Start(byte_to_read.offset)) - .context(SeekSnafu)?; - let size = byte_to_read.size as usize; - - buf.resize(size, 0); - let buf = &mut buf[..size]; - - self.source.read_exact(buf).context(ReadSnafu)?; - - parser.consume_bytes(buf)?; - } - - parser.finish() - } -} - -impl FooterParser { - /// Parses the footer from the IO source in a asynchronous manner. - pub async fn parse_async(&mut self) -> Result { - let mut parser = StageParser::new(self.file_size); - - let mut buf = vec![]; - while let Some(byte_to_read) = parser.next_to_read() { - buf.clear(); - let range = byte_to_read.offset..byte_to_read.offset + byte_to_read.size; - self.source - .read_into(range, &mut buf) - .await - .context(ReadSnafu)?; - parser.consume_bytes(&buf)?; - } - - parser.finish() - } -} - -/// The internal stages of parsing the footer. -/// This enum allows the StageParser to keep track of which part -/// of the footer needs to be parsed next. -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -enum ParseStage { - FootMagic, - Flags, - PayloadSize, - Payload, - HeadMagic, - Done, -} - -/// Manages the parsing process of the file's footer. -struct StageParser { - /// Current stage in the parsing sequence of the footer. - stage: ParseStage, - - /// Total file size; used for calculating offsets to read from. + /// The content length of the puffin file file_size: u64, - - /// Flags from the footer, set when the `Flags` field is parsed. - flags: Flags, - - /// Size of the footer's payload, set when the `PayloadSize` is parsed. - payload_size: u64, - - /// Metadata from the footer's payload, set when the `Payload` is parsed. - metadata: Option, -} - -/// Represents a read operation that needs to be performed, including the -/// offset from the start of the file and the number of bytes to read. -struct BytesToRead { - offset: u64, - size: u64, + /// The prefetch footer size + prefetch_size: Option, } -impl StageParser { - fn new(file_size: u64) -> Self { +impl<'a, R: RangeReader + 'a> PuffinFileFooterReader { + pub fn new(source: R, content_len: u64) -> Self { Self { - stage: ParseStage::FootMagic, - file_size, - payload_size: 0, - flags: Flags::empty(), - metadata: None, + source, + file_size: content_len, + prefetch_size: None, } } - /// Determines the next segment of bytes to read based on the current parsing stage. - /// This method returns information like the offset and size of the next read, - /// or None if parsing is complete. - fn next_to_read(&self) -> Option { - if self.stage == ParseStage::Done { - return None; - } - - let btr = match self.stage { - ParseStage::FootMagic => BytesToRead { - offset: self.foot_magic_offset(), - size: MAGIC_SIZE, - }, - ParseStage::Flags => BytesToRead { - offset: self.flags_offset(), - size: FLAGS_SIZE, - }, - ParseStage::PayloadSize => BytesToRead { - offset: self.payload_size_offset(), - size: PAYLOAD_SIZE_SIZE, - }, - ParseStage::Payload => BytesToRead { - offset: self.payload_offset(), - size: self.payload_size, - }, - ParseStage::HeadMagic => BytesToRead { - offset: self.head_magic_offset(), - size: MAGIC_SIZE, - }, - ParseStage::Done => unreachable!(), - }; - - Some(btr) + fn prefetch_size(&self) -> u64 { + self.prefetch_size.unwrap_or(MIN_FILE_SIZE) } - /// Processes the bytes that have been read according to the current parsing stage - /// and advances the parsing stage. It ensures the correct sequence of bytes is - /// encountered and stores the necessary information in the `StageParser`. - fn consume_bytes(&mut self, bytes: &[u8]) -> Result<()> { - match self.stage { - ParseStage::FootMagic => { - ensure!(bytes == MAGIC, MagicNotMatchedSnafu); - self.stage = ParseStage::Flags; - } - ParseStage::Flags => { - self.flags = Self::parse_flags(bytes)?; - self.stage = ParseStage::PayloadSize; - } - ParseStage::PayloadSize => { - self.payload_size = Self::parse_payload_size(bytes)?; - self.validate_payload_size()?; - self.stage = ParseStage::Payload; - } - ParseStage::Payload => { - self.metadata = Some(self.parse_payload(bytes)?); - self.validate_metadata()?; - self.stage = ParseStage::HeadMagic; - } - ParseStage::HeadMagic => { - ensure!(bytes == MAGIC, MagicNotMatchedSnafu); - self.stage = ParseStage::Done; - } - ParseStage::Done => unreachable!(), - } - - Ok(()) + pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self { + self.prefetch_size = Some(prefetch_size.max(MIN_FILE_SIZE)); + self } - /// Finalizes the parsing process, ensuring all stages are complete, and returns - /// the parsed `FileMetadata`. It converts the raw footer payload into structured data. - fn finish(self) -> Result { - ensure!( - self.stage == ParseStage::Done, - ParseStageNotMatchSnafu { - expected: format!("{:?}", ParseStage::Done), - actual: format!("{:?}", self.stage), - } - ); + pub async fn metadata(&'a mut self) -> Result { + // Note: prefetch > content_len is allowed, since we're using saturating_sub. + let footer_start = self.file_size.saturating_sub(self.prefetch_size()); + let suffix = self + .source + .read(footer_start..self.file_size) + .await + .context(ReadSnafu)?; + let suffix_len = suffix.len(); - Ok(self.metadata.unwrap()) - } + // check the magic + let magic = Self::read_tailing_four_bytes(&suffix)?; + ensure!(magic == MAGIC, MagicNotMatchedSnafu); - fn parse_flags(bytes: &[u8]) -> Result { - let n = u32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?); - Ok(Flags::from_bits_truncate(n)) - } - - fn parse_payload_size(bytes: &[u8]) -> Result { - let n = i32::from_le_bytes(bytes.try_into().context(BytesToIntegerSnafu)?); - ensure!(n >= 0, UnexpectedFooterPayloadSizeSnafu { size: n }); - Ok(n as u64) - } + let flags = self.decode_flags(&suffix[..suffix_len - MAGIC_SIZE as usize])?; + let length = self.decode_payload_size( + &suffix[..suffix_len - MAGIC_SIZE as usize - FLAGS_SIZE as usize], + )?; + let footer_size = PAYLOAD_SIZE_SIZE + FLAGS_SIZE + MAGIC_SIZE; - fn validate_payload_size(&self) -> Result<()> { - ensure!( - self.payload_size <= self.file_size - MIN_FILE_SIZE, - UnexpectedFooterPayloadSizeSnafu { - size: self.payload_size as i32 - } - ); - Ok(()) + // Did not fetch the entire file metadata in the initial read, need to make a second request. + if length > suffix_len as u64 - footer_size { + let metadata_start = self.file_size - length - footer_size; + let meta = self + .source + .read(metadata_start..self.file_size - footer_size) + .await + .context(ReadSnafu)?; + self.parse_payload(&flags, &meta) + } else { + let metadata_start = self.file_size - length - footer_size - footer_start; + let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize]; + self.parse_payload(&flags, meta) + } } - fn parse_payload(&self, bytes: &[u8]) -> Result { - if self.flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) { + fn parse_payload(&self, flags: &Flags, bytes: &[u8]) -> Result { + if flags.contains(Flags::FOOTER_PAYLOAD_COMPRESSED_LZ4) { let decoder = lz4_flex::frame::FrameDecoder::new(Cursor::new(bytes)); let res = serde_json::from_reader(decoder).context(Lz4DecompressionSnafu)?; Ok(res) @@ -254,54 +112,35 @@ impl StageParser { } } - fn validate_metadata(&self) -> Result<()> { - let metadata = self.metadata.as_ref().expect("metadata is not set"); - - let mut next_blob_offset = MAGIC_SIZE; - // check blob offsets - for blob in &metadata.blobs { - ensure!( - blob.offset as u64 == next_blob_offset, - InvalidBlobOffsetSnafu { - offset: blob.offset - } - ); - next_blob_offset += blob.length as u64; - } - - let blob_area_end = metadata - .blobs - .last() - .map_or(MAGIC_SIZE, |b| (b.offset + b.length) as u64); - ensure!( - blob_area_end == self.head_magic_offset(), - InvalidBlobAreaEndSnafu { - offset: blob_area_end - } - ); - - Ok(()) - } + fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> { + let suffix_len = suffix.len(); + ensure!(suffix_len >= 4, InvalidPuffinFooterSnafu); + let mut bytes = [0; 4]; + bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]); - fn foot_magic_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE + Ok(bytes) } - fn flags_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - FLAGS_SIZE + fn decode_flags(&self, suffix: &[u8]) -> Result { + let flags = u32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?); + Ok(Flags::from_bits_truncate(flags)) } - fn payload_size_offset(&self) -> u64 { - self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - } + fn decode_payload_size(&self, suffix: &[u8]) -> Result { + let payload_size = i32::from_le_bytes(Self::read_tailing_four_bytes(suffix)?); - fn payload_offset(&self) -> u64 { - // `validate_payload_size` ensures that this subtraction will not overflow - self.file_size - MAGIC_SIZE - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size - } + ensure!( + payload_size >= 0, + UnexpectedFooterPayloadSizeSnafu { size: payload_size } + ); + let payload_size = payload_size as u64; + ensure!( + payload_size <= self.file_size - MIN_FILE_SIZE, + UnexpectedFooterPayloadSizeSnafu { + size: self.file_size as i32 + } + ); - fn head_magic_offset(&self) -> u64 { - // `validate_payload_size` ensures that this subtraction will not overflow - self.file_size - MAGIC_SIZE * 2 - FLAGS_SIZE - PAYLOAD_SIZE_SIZE - self.payload_size + Ok(payload_size) } } diff --git a/src/puffin/src/tests.rs b/src/puffin/src/tests.rs index a152d4124bd6..a3bb48587924 100644 --- a/src/puffin/src/tests.rs +++ b/src/puffin/src/tests.rs @@ -13,26 +13,14 @@ // limitations under the License. use std::collections::HashMap; -use std::fs::File; -use std::io::{Cursor, Read}; use std::vec; use common_base::range_read::{FileReader, RangeReader}; use futures::io::Cursor as AsyncCursor; -use crate::file_format::reader::{AsyncReader, PuffinFileReader, SyncReader}; -use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter, SyncWriter}; - -#[test] -fn test_read_empty_puffin_sync() { - let path = "src/tests/resources/empty-puffin-uncompressed.puffin"; - - let file = File::open(path).unwrap(); - let mut reader = PuffinFileReader::new(file); - let metadata = reader.metadata().unwrap(); - assert_eq!(metadata.properties.len(), 0); - assert_eq!(metadata.blobs.len(), 0); -} +use crate::file_format::reader::{AsyncReader, PuffinFileFooterReader, PuffinFileReader}; +use crate::file_format::writer::{AsyncWriter, Blob, PuffinFileWriter}; +use crate::file_metadata::FileMetadata; #[tokio::test] async fn test_read_empty_puffin_async() { @@ -45,39 +33,37 @@ async fn test_read_empty_puffin_async() { assert_eq!(metadata.blobs.len(), 0); } -#[test] -fn test_sample_metric_data_puffin_sync() { - let path = "src/tests/resources/sample-metric-data-uncompressed.puffin"; - - let file = File::open(path).unwrap(); - let mut reader = PuffinFileReader::new(file); - let metadata = reader.metadata().unwrap(); - - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 2); - assert_eq!(metadata.blobs[0].blob_type, "some-blob"); - assert_eq!(metadata.blobs[0].offset, 4); - assert_eq!(metadata.blobs[0].length, 9); - - assert_eq!(metadata.blobs[1].blob_type, "some-other-blob"); - assert_eq!(metadata.blobs[1].offset, 13); - assert_eq!(metadata.blobs[1].length, 83); +async fn test_read_puffin_file_metadata( + path: &str, + file_size: u64, + expeccted_metadata: FileMetadata, +) { + for prefetch_size in [0, file_size / 2, file_size, file_size + 10] { + let reader = FileReader::new(path).await.unwrap(); + let mut footer_reader = PuffinFileFooterReader::new(reader, file_size); + if prefetch_size > 0 { + footer_reader = footer_reader.with_prefetch_size(prefetch_size); + } + let metadata = footer_reader.metadata().await.unwrap(); + assert_eq!(metadata.properties, expeccted_metadata.properties,); + assert_eq!(metadata.blobs, expeccted_metadata.blobs); + } +} - let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap(); - let mut buf = String::new(); - some_blob.read_to_string(&mut buf).unwrap(); - assert_eq!(buf, "abcdefghi"); +#[tokio::test] +async fn test_read_puffin_file_metadata_async() { + let paths = vec![ + "src/tests/resources/empty-puffin-uncompressed.puffin", + "src/tests/resources/sample-metric-data-uncompressed.puffin", + ]; + for path in paths { + let mut reader = FileReader::new(path).await.unwrap(); + let file_size = reader.metadata().await.unwrap().content_length; + let mut reader = PuffinFileReader::new(reader); + let metadata = reader.metadata().await.unwrap(); - let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap(); - let mut buf = Vec::new(); - some_other_blob.read_to_end(&mut buf).unwrap(); - let expected = include_bytes!("tests/resources/sample-metric-data.blob"); - assert_eq!(buf, expected); + test_read_puffin_file_metadata(path, file_size, metadata).await; + } } #[tokio::test] @@ -113,38 +99,6 @@ async fn test_sample_metric_data_puffin_async() { assert_eq!(buf, expected); } -#[test] -fn test_writer_reader_with_empty_sync() { - fn test_writer_reader_with_empty_sync(footer_compressed: bool) { - let mut buf = Cursor::new(vec![]); - - let mut writer = PuffinFileWriter::new(&mut buf); - writer.set_properties(HashMap::from([( - "created-by".to_string(), - "Test 1234".to_string(), - )])); - - writer.set_footer_lz4_compressed(footer_compressed); - let written_bytes = writer.finish().unwrap(); - assert!(written_bytes > 0); - - let mut buf = Cursor::new(buf.into_inner()); - let mut reader = PuffinFileReader::new(&mut buf); - let metadata = reader.metadata().unwrap(); - - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 0); - } - - test_writer_reader_with_empty_sync(false); - test_writer_reader_with_empty_sync(true); -} - #[tokio::test] async fn test_writer_reader_empty_async() { async fn test_writer_reader_empty_async(footer_compressed: bool) { @@ -176,76 +130,6 @@ async fn test_writer_reader_empty_async() { test_writer_reader_empty_async(true).await; } -#[test] -fn test_writer_reader_sync() { - fn test_writer_reader_sync(footer_compressed: bool) { - let mut buf = Cursor::new(vec![]); - - let mut writer = PuffinFileWriter::new(&mut buf); - - let blob1 = "abcdefghi"; - writer - .add_blob(Blob { - compressed_data: Cursor::new(&blob1), - blob_type: "some-blob".to_string(), - properties: Default::default(), - compression_codec: None, - }) - .unwrap(); - - let blob2 = include_bytes!("tests/resources/sample-metric-data.blob"); - writer - .add_blob(Blob { - compressed_data: Cursor::new(&blob2), - blob_type: "some-other-blob".to_string(), - properties: Default::default(), - compression_codec: None, - }) - .unwrap(); - - writer.set_properties(HashMap::from([( - "created-by".to_string(), - "Test 1234".to_string(), - )])); - - writer.set_footer_lz4_compressed(footer_compressed); - let written_bytes = writer.finish().unwrap(); - assert!(written_bytes > 0); - - let mut buf = Cursor::new(buf.into_inner()); - let mut reader = PuffinFileReader::new(&mut buf); - let metadata = reader.metadata().unwrap(); - - assert_eq!(metadata.properties.len(), 1); - assert_eq!( - metadata.properties.get("created-by"), - Some(&"Test 1234".to_string()) - ); - - assert_eq!(metadata.blobs.len(), 2); - assert_eq!(metadata.blobs[0].blob_type, "some-blob"); - assert_eq!(metadata.blobs[0].offset, 4); - assert_eq!(metadata.blobs[0].length, 9); - - assert_eq!(metadata.blobs[1].blob_type, "some-other-blob"); - assert_eq!(metadata.blobs[1].offset, 13); - assert_eq!(metadata.blobs[1].length, 83); - - let mut some_blob = reader.blob_reader(&metadata.blobs[0]).unwrap(); - let mut buf = String::new(); - some_blob.read_to_string(&mut buf).unwrap(); - assert_eq!(buf, blob1); - - let mut some_other_blob = reader.blob_reader(&metadata.blobs[1]).unwrap(); - let mut buf = Vec::new(); - some_other_blob.read_to_end(&mut buf).unwrap(); - assert_eq!(buf, blob2); - } - - test_writer_reader_sync(false); - test_writer_reader_sync(true); -} - #[tokio::test] async fn test_writer_reader_async() { async fn test_writer_reader_async(footer_compressed: bool) { From 8c1959c580fdb3c5ecafdb6bc4fb6395a80ebedf Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 11:49:54 +0800 Subject: [PATCH 13/59] feat: add prefetch support to `InvertedIndexFooterReader` for reduced I/O time (#5146) * feat: add prefetch support to `InvertedIndeFooterReader` * chore: correct struct name * chore: apply suggestions from CR --- src/index/src/inverted_index/error.rs | 16 ++- .../src/inverted_index/format/reader/blob.rs | 6 +- .../inverted_index/format/reader/footer.rs | 135 ++++++++++++------ src/index/src/lib.rs | 1 + 4 files changed, 114 insertions(+), 44 deletions(-) diff --git a/src/index/src/inverted_index/error.rs b/src/index/src/inverted_index/error.rs index 49816e63c463..7e861beda6d1 100644 --- a/src/index/src/inverted_index/error.rs +++ b/src/index/src/inverted_index/error.rs @@ -68,6 +68,18 @@ pub enum Error { location: Location, }, + #[snafu(display("Blob size too small"))] + BlobSizeTooSmall { + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Invalid footer payload size"))] + InvalidFooterPayloadSize { + #[snafu(implicit)] + location: Location, + }, + #[snafu(display("Unexpected inverted index footer payload size, max: {max_payload_size}, actual: {actual_payload_size}"))] UnexpectedFooterPayloadSize { max_payload_size: u64, @@ -220,7 +232,9 @@ impl ErrorExt for Error { | KeysApplierUnexpectedPredicates { .. } | CommonIo { .. } | UnknownIntermediateCodecMagic { .. } - | FstCompile { .. } => StatusCode::Unexpected, + | FstCompile { .. } + | InvalidFooterPayloadSize { .. } + | BlobSizeTooSmall { .. } => StatusCode::Unexpected, ParseRegex { .. } | ParseDFA { .. } diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index ace0e5c48536..de34cd36f849 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -19,8 +19,9 @@ use common_base::range_read::RangeReader; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::{ensure, ResultExt}; +use super::footer::DEFAULT_PREFETCH_SIZE; use crate::inverted_index::error::{CommonIoSnafu, Result, UnexpectedBlobSizeSnafu}; -use crate::inverted_index::format::reader::footer::InvertedIndeFooterReader; +use crate::inverted_index::format::reader::footer::InvertedIndexFooterReader; use crate::inverted_index::format::reader::InvertedIndexReader; use crate::inverted_index::format::MIN_BLOB_SIZE; @@ -72,7 +73,8 @@ impl InvertedIndexReader for InvertedIndexBlobReader { let blob_size = metadata.content_length; Self::validate_blob_size(blob_size)?; - let mut footer_reader = InvertedIndeFooterReader::new(&mut self.source, blob_size); + let mut footer_reader = InvertedIndexFooterReader::new(&mut self.source, blob_size) + .with_prefetch_size(DEFAULT_PREFETCH_SIZE); footer_reader.metadata().await.map(Arc::new) } } diff --git a/src/index/src/inverted_index/format/reader/footer.rs b/src/index/src/inverted_index/format/reader/footer.rs index 1f35237711ce..c025ecf52ecd 100644 --- a/src/index/src/inverted_index/format/reader/footer.rs +++ b/src/index/src/inverted_index/format/reader/footer.rs @@ -18,53 +18,88 @@ use prost::Message; use snafu::{ensure, ResultExt}; use crate::inverted_index::error::{ - CommonIoSnafu, DecodeProtoSnafu, Result, UnexpectedFooterPayloadSizeSnafu, - UnexpectedOffsetSizeSnafu, UnexpectedZeroSegmentRowCountSnafu, + BlobSizeTooSmallSnafu, CommonIoSnafu, DecodeProtoSnafu, InvalidFooterPayloadSizeSnafu, Result, + UnexpectedFooterPayloadSizeSnafu, UnexpectedOffsetSizeSnafu, + UnexpectedZeroSegmentRowCountSnafu, }; use crate::inverted_index::format::FOOTER_PAYLOAD_SIZE_SIZE; -/// InvertedIndeFooterReader is for reading the footer section of the blob. -pub struct InvertedIndeFooterReader { +pub const DEFAULT_PREFETCH_SIZE: u64 = 1024; // 1KiB + +/// InvertedIndexFooterReader is for reading the footer section of the blob. +pub struct InvertedIndexFooterReader { source: R, blob_size: u64, + prefetch_size: Option, } -impl InvertedIndeFooterReader { +impl InvertedIndexFooterReader { pub fn new(source: R, blob_size: u64) -> Self { - Self { source, blob_size } + Self { + source, + blob_size, + prefetch_size: None, + } + } + + /// Set the prefetch size for the footer reader. + pub fn with_prefetch_size(mut self, prefetch_size: u64) -> Self { + self.prefetch_size = Some(prefetch_size.max(FOOTER_PAYLOAD_SIZE_SIZE)); + self + } + + pub fn prefetch_size(&self) -> u64 { + self.prefetch_size.unwrap_or(FOOTER_PAYLOAD_SIZE_SIZE) } } -impl InvertedIndeFooterReader { +impl InvertedIndexFooterReader { pub async fn metadata(&mut self) -> Result { - let payload_size = self.read_payload_size().await?; - let metas = self.read_payload(payload_size).await?; - Ok(metas) - } + ensure!( + self.blob_size >= FOOTER_PAYLOAD_SIZE_SIZE, + BlobSizeTooSmallSnafu + ); - async fn read_payload_size(&mut self) -> Result { - let mut size_buf = [0u8; FOOTER_PAYLOAD_SIZE_SIZE as usize]; - let end = self.blob_size; - let start = end - FOOTER_PAYLOAD_SIZE_SIZE; - self.source - .read_into(start..end, &mut &mut size_buf[..]) + let footer_start = self.blob_size.saturating_sub(self.prefetch_size()); + let suffix = self + .source + .read(footer_start..self.blob_size) .await .context(CommonIoSnafu)?; + let suffix_len = suffix.len(); + let length = u32::from_le_bytes(Self::read_tailing_four_bytes(&suffix)?) as u64; + self.validate_payload_size(length)?; + + let footer_size = FOOTER_PAYLOAD_SIZE_SIZE; + + // Did not fetch the entire file metadata in the initial read, need to make a second request. + if length > suffix_len as u64 - footer_size { + let metadata_start = self.blob_size - length - footer_size; + let meta = self + .source + .read(metadata_start..self.blob_size - footer_size) + .await + .context(CommonIoSnafu)?; + self.parse_payload(&meta, length) + } else { + let metadata_start = self.blob_size - length - footer_size - footer_start; + let meta = &suffix[metadata_start as usize..suffix_len - footer_size as usize]; + self.parse_payload(meta, length) + } + } - let payload_size = u32::from_le_bytes(size_buf) as u64; - self.validate_payload_size(payload_size)?; + fn read_tailing_four_bytes(suffix: &[u8]) -> Result<[u8; 4]> { + let suffix_len = suffix.len(); + ensure!(suffix_len >= 4, InvalidFooterPayloadSizeSnafu); + let mut bytes = [0; 4]; + bytes.copy_from_slice(&suffix[suffix_len - 4..suffix_len]); - Ok(payload_size) + Ok(bytes) } - async fn read_payload(&mut self, payload_size: u64) -> Result { - let end = self.blob_size - FOOTER_PAYLOAD_SIZE_SIZE; - let start = end - payload_size; - let bytes = self.source.read(start..end).await.context(CommonIoSnafu)?; - - let metas = InvertedIndexMetas::decode(&*bytes).context(DecodeProtoSnafu)?; + fn parse_payload(&mut self, bytes: &[u8], payload_size: u64) -> Result { + let metas = InvertedIndexMetas::decode(bytes).context(DecodeProtoSnafu)?; self.validate_metas(&metas, payload_size)?; - Ok(metas) } @@ -113,9 +148,12 @@ impl InvertedIndeFooterReader { #[cfg(test)] mod tests { + use std::assert_matches::assert_matches; + use prost::Message; use super::*; + use crate::inverted_index::error::Error; fn create_test_payload(meta: InvertedIndexMeta) -> Vec { let mut metas = InvertedIndexMetas { @@ -141,14 +179,18 @@ mod tests { let mut payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size = reader.read_payload_size().await.unwrap(); - let metas = reader.read_payload(payload_size).await.unwrap(); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } - assert_eq!(metas.metas.len(), 1); - let index_meta = &metas.metas.get("test").unwrap(); - assert_eq!(index_meta.name, "test"); + let metas = reader.metadata().await.unwrap(); + assert_eq!(metas.metas.len(), 1); + let index_meta = &metas.metas.get("test").unwrap(); + assert_eq!(index_meta.name, "test"); + } } #[tokio::test] @@ -157,14 +199,20 @@ mod tests { name: "test".to_string(), ..Default::default() }; - let mut payload_buf = create_test_payload(meta); payload_buf.push(0xff); // Add an extra byte to corrupt the footer let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size_result = reader.read_payload_size().await; - assert!(payload_size_result.is_err()); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let blob_size = payload_buf.len() as u64; + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } + + let result = reader.metadata().await; + assert_matches!(result, Err(Error::UnexpectedFooterPayloadSize { .. })); + } } #[tokio::test] @@ -178,10 +226,15 @@ mod tests { let mut payload_buf = create_test_payload(meta); let blob_size = payload_buf.len() as u64; - let mut reader = InvertedIndeFooterReader::new(&mut payload_buf, blob_size); - let payload_size = reader.read_payload_size().await.unwrap(); - let payload_result = reader.read_payload(payload_size).await; - assert!(payload_result.is_err()); + for prefetch in [0, blob_size / 2, blob_size, blob_size + 10] { + let mut reader = InvertedIndexFooterReader::new(&mut payload_buf, blob_size); + if prefetch > 0 { + reader = reader.with_prefetch_size(prefetch); + } + + let result = reader.metadata().await; + assert_matches!(result, Err(Error::UnexpectedOffsetSize { .. })); + } } } diff --git a/src/index/src/lib.rs b/src/index/src/lib.rs index 197fc01818c0..5e2e41166863 100644 --- a/src/index/src/lib.rs +++ b/src/index/src/lib.rs @@ -13,6 +13,7 @@ // limitations under the License. #![feature(iter_partition_in_place)] +#![feature(assert_matches)] pub mod fulltext_index; pub mod inverted_index; From d53fbcb9362892623da9a8d6475c82a4ac250faa Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 12:09:36 +0800 Subject: [PATCH 14/59] feat: introduce `PuffinMetadataCache` (#5148) * feat: introduce `PuffinMetadataCache` * refactor: remove too_many_arguments * chore: fmt toml --- Cargo.lock | 1 + src/mito2/src/cache.rs | 17 ++++++ src/mito2/src/config.rs | 4 ++ src/mito2/src/read/scan_region.rs | 11 +++- src/mito2/src/sst/file.rs | 1 + .../src/sst/index/inverted_index/applier.rs | 42 ++++++++++--- .../index/inverted_index/applier/builder.rs | 55 ++++++++++++----- .../inverted_index/applier/builder/between.rs | 10 ---- .../applier/builder/comparison.rs | 8 --- .../inverted_index/applier/builder/eq_list.rs | 14 ----- .../inverted_index/applier/builder/in_list.rs | 10 ---- .../applier/builder/regex_match.rs | 8 --- .../src/sst/index/inverted_index/creator.rs | 7 ++- src/mito2/src/worker.rs | 1 + src/puffin/Cargo.toml | 1 + src/puffin/src/blob_metadata.rs | 14 +++++ src/puffin/src/file_format/reader/file.rs | 5 ++ src/puffin/src/file_metadata.rs | 16 +++++ src/puffin/src/puffin_manager.rs | 1 + src/puffin/src/puffin_manager/cache.rs | 60 +++++++++++++++++++ .../src/puffin_manager/fs_puffin_manager.rs | 17 +++++- .../fs_puffin_manager/reader.rs | 39 ++++++++++-- 22 files changed, 258 insertions(+), 84 deletions(-) create mode 100644 src/puffin/src/puffin_manager/cache.rs diff --git a/Cargo.lock b/Cargo.lock index 311caafcb2fe..e57a6542afbb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8883,6 +8883,7 @@ dependencies = [ "lz4_flex 0.11.3", "moka", "pin-project", + "prometheus", "serde", "serde_json", "sha2", diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 7d977a328ca1..7018b039d62e 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -32,6 +32,7 @@ use moka::notification::RemovalCause; use moka::sync::Cache; use parquet::column::page::Page; use parquet::file::metadata::ParquetMetaData; +use puffin::puffin_manager::cache::{PuffinMetadataCache, PuffinMetadataCacheRef}; use store_api::storage::{ConcreteDataType, RegionId, TimeSeriesRowSelector}; use crate::cache::cache_size::parquet_meta_size; @@ -68,6 +69,8 @@ pub struct CacheManager { write_cache: Option, /// Cache for inverted index. index_cache: Option, + /// Puffin metadata cache. + puffin_metadata_cache: Option, /// Cache for time series selectors. selector_result_cache: Option, } @@ -217,6 +220,10 @@ impl CacheManager { pub(crate) fn index_cache(&self) -> Option<&InvertedIndexCacheRef> { self.index_cache.as_ref() } + + pub(crate) fn puffin_metadata_cache(&self) -> Option<&PuffinMetadataCacheRef> { + self.puffin_metadata_cache.as_ref() + } } /// Increases selector cache miss metrics. @@ -237,6 +244,7 @@ pub struct CacheManagerBuilder { page_cache_size: u64, index_metadata_size: u64, index_content_size: u64, + puffin_metadata_size: u64, write_cache: Option, selector_result_cache_size: u64, } @@ -278,6 +286,12 @@ impl CacheManagerBuilder { self } + /// Sets cache size for puffin metadata. + pub fn puffin_metadata_size(mut self, bytes: u64) -> Self { + self.puffin_metadata_size = bytes; + self + } + /// Sets selector result cache size. pub fn selector_result_cache_size(mut self, bytes: u64) -> Self { self.selector_result_cache_size = bytes; @@ -340,6 +354,8 @@ impl CacheManagerBuilder { }); let inverted_index_cache = InvertedIndexCache::new(self.index_metadata_size, self.index_content_size); + let puffin_metadata_cache = + PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES); let selector_result_cache = (self.selector_result_cache_size != 0).then(|| { Cache::builder() .max_capacity(self.selector_result_cache_size) @@ -361,6 +377,7 @@ impl CacheManagerBuilder { page_cache, write_cache: self.write_cache, index_cache: Some(Arc::new(inverted_index_cache)), + puffin_metadata_cache: Some(Arc::new(puffin_metadata_cache)), selector_result_cache, } } diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index 9b113027a41b..dda3f4271059 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -304,6 +304,9 @@ pub struct IndexConfig { /// Write buffer size for creating the index. pub write_buffer_size: ReadableSize, + + /// Cache size for metadata of puffin files. Setting it to 0 to disable the cache. + pub metadata_cache_size: ReadableSize, } impl Default for IndexConfig { @@ -312,6 +315,7 @@ impl Default for IndexConfig { aux_path: String::new(), staging_size: ReadableSize::gb(2), write_buffer_size: ReadableSize::mb(8), + metadata_cache_size: ReadableSize::mb(64), } } } diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 19324f119f3e..32b8c90cda02 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -413,11 +413,15 @@ impl ScanRegion { .and_then(|c| c.index_cache()) .cloned(); + let puffin_metadata_cache = self + .cache_manager + .as_ref() + .and_then(|c| c.puffin_metadata_cache()) + .cloned(); + InvertedIndexApplierBuilder::new( self.access_layer.region_dir().to_string(), self.access_layer.object_store().clone(), - file_cache, - index_cache, self.version.metadata.as_ref(), self.version.metadata.inverted_indexed_column_ids( self.version @@ -429,6 +433,9 @@ impl ScanRegion { ), self.access_layer.puffin_manager_factory().clone(), ) + .with_file_cache(file_cache) + .with_index_cache(index_cache) + .with_puffin_metadata_cache(puffin_metadata_cache) .build(&self.request.filters) .inspect_err(|err| warn!(err; "Failed to build invereted index applier")) .ok() diff --git a/src/mito2/src/sst/file.rs b/src/mito2/src/sst/file.rs index 451ec44f1cd2..4353ae55e3e9 100644 --- a/src/mito2/src/sst/file.rs +++ b/src/mito2/src/sst/file.rs @@ -149,6 +149,7 @@ impl FileMeta { pub fn inverted_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::InvertedIndex) } + pub fn fulltext_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::FulltextIndex) } diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index cac3ffedd74c..bf5206ef44be 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -22,6 +22,7 @@ use index::inverted_index::search::index_apply::{ ApplyOutput, IndexApplier, IndexNotFoundStrategy, SearchContext, }; use object_store::ObjectStore; +use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use puffin::puffin_manager::{BlobGuard, PuffinManager, PuffinReader}; use snafu::ResultExt; use store_api::storage::RegionId; @@ -60,6 +61,9 @@ pub(crate) struct InvertedIndexApplier { /// In-memory cache for inverted index. inverted_index_cache: Option, + + /// Puffin metadata cache. + puffin_metadata_cache: Option, } pub(crate) type InvertedIndexApplierRef = Arc; @@ -70,8 +74,6 @@ impl InvertedIndexApplier { region_dir: String, region_id: RegionId, store: ObjectStore, - file_cache: Option, - index_cache: Option, index_applier: Box, puffin_manager_factory: PuffinManagerFactory, ) -> Self { @@ -81,13 +83,35 @@ impl InvertedIndexApplier { region_dir, region_id, store, - file_cache, + file_cache: None, index_applier, puffin_manager_factory, - inverted_index_cache: index_cache, + inverted_index_cache: None, + puffin_metadata_cache: None, } } + /// Sets the file cache. + pub fn with_file_cache(mut self, file_cache: Option) -> Self { + self.file_cache = file_cache; + self + } + + /// Sets the index cache. + pub fn with_index_cache(mut self, index_cache: Option) -> Self { + self.inverted_index_cache = index_cache; + self + } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } + /// Applies predicates to the provided SST file id and returns the relevant row group ids pub async fn apply(&self, file_id: FileId) -> Result { let _timer = INDEX_APPLY_ELAPSED @@ -105,6 +129,7 @@ impl InvertedIndexApplier { if let Err(err) = other { warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.") } + self.remote_blob_reader(file_id).await? } }; @@ -157,7 +182,10 @@ impl InvertedIndexApplier { /// Creates a blob reader from the remote index file. async fn remote_blob_reader(&self, file_id: FileId) -> Result { - let puffin_manager = self.puffin_manager_factory.build(self.store.clone()); + let puffin_manager = self + .puffin_manager_factory + .build(self.store.clone()) + .with_puffin_metadata_cache(self.puffin_metadata_cache.clone()); let file_path = location::index_file_path(&self.region_dir, file_id); puffin_manager .reader(&file_path) @@ -219,8 +247,6 @@ mod tests { region_dir.clone(), RegionId::new(0, 0), object_store, - None, - None, Box::new(mock_index_applier), puffin_manager_factory, ); @@ -261,8 +287,6 @@ mod tests { region_dir.clone(), RegionId::new(0, 0), object_store, - None, - None, Box::new(mock_index_applier), puffin_manager_factory, ); diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder.rs b/src/mito2/src/sst/index/inverted_index/applier/builder.rs index 603cf5aa23fd..653679b9fca8 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder.rs @@ -28,6 +28,7 @@ use datatypes::value::Value; use index::inverted_index::search::index_apply::PredicatesIndexApplier; use index::inverted_index::search::predicate::Predicate; use object_store::ObjectStore; +use puffin::puffin_manager::cache::PuffinMetadataCacheRef; use snafu::{OptionExt, ResultExt}; use store_api::metadata::RegionMetadata; use store_api::storage::ColumnId; @@ -65,6 +66,9 @@ pub(crate) struct InvertedIndexApplierBuilder<'a> { /// Cache for inverted index. index_cache: Option, + + /// Cache for puffin metadata. + puffin_metadata_cache: Option, } impl<'a> InvertedIndexApplierBuilder<'a> { @@ -72,8 +76,6 @@ impl<'a> InvertedIndexApplierBuilder<'a> { pub fn new( region_dir: String, object_store: ObjectStore, - file_cache: Option, - index_cache: Option, metadata: &'a RegionMetadata, indexed_column_ids: HashSet, puffin_manager_factory: PuffinManagerFactory, @@ -81,15 +83,37 @@ impl<'a> InvertedIndexApplierBuilder<'a> { Self { region_dir, object_store, - file_cache, metadata, indexed_column_ids, output: HashMap::default(), - index_cache, puffin_manager_factory, + file_cache: None, + index_cache: None, + puffin_metadata_cache: None, } } + /// Sets the file cache. + pub fn with_file_cache(mut self, file_cache: Option) -> Self { + self.file_cache = file_cache; + self + } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } + + /// Sets the index cache. + pub fn with_index_cache(mut self, index_cache: Option) -> Self { + self.index_cache = index_cache; + self + } + /// Consumes the builder to construct an [`InvertedIndexApplier`], optionally returned based on /// the expressions provided. If no predicates match, returns `None`. pub fn build(mut self, exprs: &[Expr]) -> Result> { @@ -108,15 +132,18 @@ impl<'a> InvertedIndexApplierBuilder<'a> { .collect(); let applier = PredicatesIndexApplier::try_from(predicates); - Ok(Some(InvertedIndexApplier::new( - self.region_dir, - self.metadata.region_id, - self.object_store, - self.file_cache, - self.index_cache, - Box::new(applier.context(BuildIndexApplierSnafu)?), - self.puffin_manager_factory, - ))) + Ok(Some( + InvertedIndexApplier::new( + self.region_dir, + self.metadata.region_id, + self.object_store, + Box::new(applier.context(BuildIndexApplierSnafu)?), + self.puffin_manager_factory, + ) + .with_file_cache(self.file_cache) + .with_puffin_metadata_cache(self.puffin_metadata_cache) + .with_index_cache(self.index_cache), + )) } /// Recursively traverses expressions to collect predicates. @@ -322,8 +349,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs index 0a196e6f1ac6..51f7f001e25b 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/between.rs @@ -75,8 +75,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -118,8 +116,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -144,8 +140,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -187,8 +181,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -214,8 +206,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs index cdaec9f94e95..138b15b82eb9 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/comparison.rs @@ -231,8 +231,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -260,8 +258,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -280,8 +276,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -315,8 +309,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs index 1d07cca48724..35a5caad56a6 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/eq_list.rs @@ -137,8 +137,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -175,8 +173,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -204,8 +200,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -224,8 +218,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -244,8 +236,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -303,8 +293,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -341,8 +329,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs index 6a520ba401d3..224e10c452ff 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/in_list.rs @@ -68,8 +68,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -101,8 +99,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -126,8 +122,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -159,8 +153,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -186,8 +178,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs index 7fdf7f3de55c..7148986e6d11 100644 --- a/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs +++ b/src/mito2/src/sst/index/inverted_index/applier/builder/regex_match.rs @@ -62,8 +62,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -91,8 +89,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -120,8 +116,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, @@ -142,8 +136,6 @@ mod tests { let mut builder = InvertedIndexApplierBuilder::new( "test".to_string(), test_object_store(), - None, - None, &metadata, HashSet::from_iter([1, 2, 3]), facotry, diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 6db1ef6e0b7b..029a0da8484f 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -310,12 +310,14 @@ mod tests { use futures::future::BoxFuture; use object_store::services::Memory; use object_store::ObjectStore; + use puffin::puffin_manager::cache::PuffinMetadataCache; use puffin::puffin_manager::PuffinManager; use store_api::metadata::{ColumnMetadata, RegionMetadataBuilder}; use store_api::storage::RegionId; use super::*; use crate::cache::index::InvertedIndexCache; + use crate::metrics::CACHE_BYTES; use crate::read::BatchColumn; use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; use crate::sst::index::inverted_index::applier::builder::InvertedIndexApplierBuilder; @@ -447,15 +449,16 @@ mod tests { move |expr| { let _d = &d; let cache = Arc::new(InvertedIndexCache::new(10, 10)); + let puffin_metadata_cache = Arc::new(PuffinMetadataCache::new(10, &CACHE_BYTES)); let applier = InvertedIndexApplierBuilder::new( region_dir.clone(), object_store.clone(), - None, - Some(cache), ®ion_metadata, indexed_column_ids.clone(), factory.clone(), ) + .with_index_cache(Some(cache)) + .with_puffin_metadata_cache(Some(puffin_metadata_cache)) .build(&[expr]) .unwrap() .unwrap(); diff --git a/src/mito2/src/worker.rs b/src/mito2/src/worker.rs index 33d26c8196df..f8ab9c3f4edb 100644 --- a/src/mito2/src/worker.rs +++ b/src/mito2/src/worker.rs @@ -170,6 +170,7 @@ impl WorkerGroup { .selector_result_cache_size(config.selector_result_cache_size.as_bytes()) .index_metadata_size(config.inverted_index.metadata_cache_size.as_bytes()) .index_content_size(config.inverted_index.content_cache_size.as_bytes()) + .puffin_metadata_size(config.index.metadata_cache_size.as_bytes()) .write_cache(write_cache) .build(), ); diff --git a/src/puffin/Cargo.toml b/src/puffin/Cargo.toml index e4e6c74a5c9b..31c92ba4f972 100644 --- a/src/puffin/Cargo.toml +++ b/src/puffin/Cargo.toml @@ -25,6 +25,7 @@ futures.workspace = true lz4_flex = "0.11" moka = { workspace = true, features = ["future", "sync"] } pin-project.workspace = true +prometheus.workspace = true serde.workspace = true serde_json.workspace = true sha2 = "0.10.8" diff --git a/src/puffin/src/blob_metadata.rs b/src/puffin/src/blob_metadata.rs index bb2475bfa336..67eb62c5ff1b 100644 --- a/src/puffin/src/blob_metadata.rs +++ b/src/puffin/src/blob_metadata.rs @@ -68,6 +68,20 @@ pub struct BlobMetadata { pub properties: HashMap, } +impl BlobMetadata { + /// Calculates the memory usage of the blob metadata in bytes. + pub fn memory_usage(&self) -> usize { + self.blob_type.len() + + self.input_fields.len() * std::mem::size_of::() + + self + .properties + .iter() + .map(|(k, v)| k.len() + v.len()) + .sum::() + + std::mem::size_of::() + } +} + /// Compression codec used to compress the blob #[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)] #[serde(rename_all = "lowercase")] diff --git a/src/puffin/src/file_format/reader/file.rs b/src/puffin/src/file_format/reader/file.rs index 31e8e10bc4d5..9ed40a7f181e 100644 --- a/src/puffin/src/file_format/reader/file.rs +++ b/src/puffin/src/file_format/reader/file.rs @@ -46,6 +46,11 @@ impl PuffinFileReader { } } + pub fn with_metadata(mut self, metadata: Option) -> Self { + self.metadata = metadata; + self + } + fn validate_file_size(file_size: u64) -> Result<()> { ensure!( file_size >= MIN_FILE_SIZE, diff --git a/src/puffin/src/file_metadata.rs b/src/puffin/src/file_metadata.rs index 74eea3aa08f3..4804c65be495 100644 --- a/src/puffin/src/file_metadata.rs +++ b/src/puffin/src/file_metadata.rs @@ -33,6 +33,22 @@ pub struct FileMetadata { pub properties: HashMap, } +impl FileMetadata { + /// Calculates the memory usage of the file metadata in bytes. + pub fn memory_usage(&self) -> usize { + self.blobs + .iter() + .map(|blob| blob.memory_usage()) + .sum::() + + self + .properties + .iter() + .map(|(k, v)| k.len() + v.len()) + .sum::() + + std::mem::size_of::() + } +} + #[cfg(test)] mod tests { use std::collections::HashMap; diff --git a/src/puffin/src/puffin_manager.rs b/src/puffin/src/puffin_manager.rs index 7bd5e9039d03..17101b1662e8 100644 --- a/src/puffin/src/puffin_manager.rs +++ b/src/puffin/src/puffin_manager.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub mod cache; pub mod file_accessor; pub mod fs_puffin_manager; pub mod stager; diff --git a/src/puffin/src/puffin_manager/cache.rs b/src/puffin/src/puffin_manager/cache.rs new file mode 100644 index 000000000000..66fcb36bf9c2 --- /dev/null +++ b/src/puffin/src/puffin_manager/cache.rs @@ -0,0 +1,60 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::sync::Arc; + +use prometheus::IntGaugeVec; + +use crate::file_metadata::FileMetadata; +/// Metrics for index metadata. +const PUFFIN_METADATA_TYPE: &str = "puffin_metadata"; + +pub type PuffinMetadataCacheRef = Arc; + +/// A cache for storing the metadata of the index files. +pub struct PuffinMetadataCache { + cache: moka::sync::Cache>, +} + +fn puffin_metadata_weight(k: &String, v: &Arc) -> u32 { + (k.as_bytes().len() + v.memory_usage()) as u32 +} + +impl PuffinMetadataCache { + pub fn new(capacity: u64, cache_bytes: &'static IntGaugeVec) -> Self { + common_telemetry::debug!("Building PuffinMetadataCache with capacity: {capacity}"); + Self { + cache: moka::sync::CacheBuilder::new(capacity) + .name("puffin_metadata") + .weigher(puffin_metadata_weight) + .eviction_listener(|k, v, _cause| { + let size = puffin_metadata_weight(&k, &v); + cache_bytes + .with_label_values(&[PUFFIN_METADATA_TYPE]) + .sub(size.into()); + }) + .build(), + } + } + + /// Gets the metadata from the cache. + pub fn get_metadata(&self, file_id: &str) -> Option> { + self.cache.get(file_id) + } + + /// Puts the metadata into the cache. + pub fn put_metadata(&self, file_id: String, metadata: Arc) { + self.cache.insert(file_id, metadata); + } +} diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager.rs b/src/puffin/src/puffin_manager/fs_puffin_manager.rs index 976eb239979a..52190f92fb28 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager.rs @@ -21,6 +21,7 @@ pub use reader::FsPuffinReader; pub use writer::FsPuffinWriter; use crate::error::Result; +use crate::puffin_manager::cache::PuffinMetadataCacheRef; use crate::puffin_manager::file_accessor::PuffinFileAccessor; use crate::puffin_manager::stager::Stager; use crate::puffin_manager::PuffinManager; @@ -31,16 +32,29 @@ pub struct FsPuffinManager { stager: S, /// The puffin file accessor. puffin_file_accessor: F, + /// The puffin metadata cache. + puffin_metadata_cache: Option, } impl FsPuffinManager { - /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`. + /// Creates a new `FsPuffinManager` with the specified `stager` and `puffin_file_accessor`, + /// and optionally with a `puffin_metadata_cache`. pub fn new(stager: S, puffin_file_accessor: F) -> Self { Self { stager, puffin_file_accessor, + puffin_metadata_cache: None, } } + + /// Sets the puffin metadata cache. + pub fn with_puffin_metadata_cache( + mut self, + puffin_metadata_cache: Option, + ) -> Self { + self.puffin_metadata_cache = puffin_metadata_cache; + self + } } #[async_trait] @@ -57,6 +71,7 @@ where puffin_file_name.to_string(), self.stager.clone(), self.puffin_file_accessor.clone(), + self.puffin_metadata_cache.clone(), )) } diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs index 3de27fdb77b0..2e1ae594adc6 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs @@ -14,6 +14,7 @@ use std::io; use std::ops::Range; +use std::sync::Arc; use async_compression::futures::bufread::ZstdDecoder; use async_trait::async_trait; @@ -23,12 +24,14 @@ use futures::io::BufReader; use futures::{AsyncRead, AsyncWrite}; use snafu::{ensure, OptionExt, ResultExt}; +use super::PuffinMetadataCacheRef; use crate::blob_metadata::{BlobMetadata, CompressionCodec}; use crate::error::{ BlobIndexOutOfBoundSnafu, BlobNotFoundSnafu, DeserializeJsonSnafu, FileKeyNotMatchSnafu, MetadataSnafu, ReadSnafu, Result, UnsupportedDecompressionSnafu, WriteSnafu, }; use crate::file_format::reader::{AsyncReader, PuffinFileReader}; +use crate::file_metadata::FileMetadata; use crate::partial_reader::PartialReader; use crate::puffin_manager::file_accessor::PuffinFileAccessor; use crate::puffin_manager::fs_puffin_manager::dir_meta::DirMetadata; @@ -45,14 +48,23 @@ pub struct FsPuffinReader { /// The puffin file accessor. puffin_file_accessor: F, + + /// The puffin file metadata cache. + puffin_file_metadata_cache: Option, } impl FsPuffinReader { - pub(crate) fn new(puffin_file_name: String, stager: S, puffin_file_accessor: F) -> Self { + pub(crate) fn new( + puffin_file_name: String, + stager: S, + puffin_file_accessor: F, + puffin_file_metadata_cache: Option, + ) -> Self { Self { puffin_file_name, stager, puffin_file_accessor, + puffin_file_metadata_cache, } } } @@ -73,13 +85,13 @@ where .await?; let mut file = PuffinFileReader::new(reader); - // TODO(zhongzc): cache the metadata. - let metadata = file.metadata().await?; + let metadata = self.get_puffin_file_metadata(&mut file).await?; let blob_metadata = metadata .blobs - .into_iter() + .iter() .find(|m| m.blob_type == key) - .context(BlobNotFoundSnafu { blob: key })?; + .context(BlobNotFoundSnafu { blob: key })? + .clone(); let blob = if blob_metadata.compression_codec.is_none() { // If the blob is not compressed, we can directly read it from the puffin file. @@ -133,6 +145,23 @@ where S: Stager, F: PuffinFileAccessor + Clone, { + async fn get_puffin_file_metadata( + &self, + reader: &mut PuffinFileReader, + ) -> Result> { + if let Some(cache) = self.puffin_file_metadata_cache.as_ref() { + if let Some(metadata) = cache.get_metadata(&self.puffin_file_name) { + return Ok(metadata); + } + } + + let metadata = Arc::new(reader.metadata().await?); + if let Some(cache) = self.puffin_file_metadata_cache.as_ref() { + cache.put_metadata(self.puffin_file_name.to_string(), metadata.clone()); + } + Ok(metadata) + } + async fn init_blob_to_stager( reader: PuffinFileReader, blob_metadata: BlobMetadata, From 03ad6e2a8dd8cc5632e433b94bb935fdd286c94c Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Thu, 12 Dec 2024 12:21:38 +0800 Subject: [PATCH 15/59] feat(fuzz): add alter table options for alter fuzzer (#5074) * feat(fuzz): add set table options to alter fuzzer * chore: clippy is happy, I'm sad * chore: happy ci happy * fix: unit test * feat(fuzz): add unset table options to alter fuzzer * fix: unit test * feat(fuzz): add table option validator * fix: make clippy happy * chore: add comments * chore: apply review comments * fix: unit test * feat(fuzz): add more ttl options * fix: #5108 * chore: add comments * chore: add comments --- Cargo.lock | 1 + src/common/base/src/readable_size.rs | 2 +- src/sql/src/statements/alter.rs | 21 +- tests-fuzz/Cargo.toml | 11 +- tests-fuzz/src/context.rs | 59 ++++- tests-fuzz/src/generator/alter_expr.rs | 143 +++++++++++- tests-fuzz/src/ir.rs | 2 +- tests-fuzz/src/ir/alter_expr.rs | 206 +++++++++++++++++- tests-fuzz/src/test_utils.rs | 1 + tests-fuzz/src/translator.rs | 1 + tests-fuzz/src/translator/common.rs | 67 ++++++ tests-fuzz/src/translator/mysql/alter_expr.rs | 67 +++++- .../src/translator/postgres/alter_expr.rs | 67 +++++- tests-fuzz/src/validator.rs | 1 + tests-fuzz/src/validator/table.rs | 103 +++++++++ .../{ => ddl}/fuzz_alter_logical_table.rs | 0 .../targets/{ => ddl}/fuzz_alter_table.rs | 58 ++++- .../targets/{ => ddl}/fuzz_create_database.rs | 0 .../{ => ddl}/fuzz_create_logical_table.rs | 0 .../targets/{ => ddl}/fuzz_create_table.rs | 0 20 files changed, 742 insertions(+), 68 deletions(-) create mode 100644 tests-fuzz/src/translator/common.rs create mode 100644 tests-fuzz/src/validator/table.rs rename tests-fuzz/targets/{ => ddl}/fuzz_alter_logical_table.rs (100%) rename tests-fuzz/targets/{ => ddl}/fuzz_alter_table.rs (72%) rename tests-fuzz/targets/{ => ddl}/fuzz_create_database.rs (100%) rename tests-fuzz/targets/{ => ddl}/fuzz_create_logical_table.rs (100%) rename tests-fuzz/targets/{ => ddl}/fuzz_create_table.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index e57a6542afbb..534b8c465ae6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -12197,6 +12197,7 @@ dependencies = [ "arbitrary", "async-trait", "chrono", + "common-base", "common-error", "common-macro", "common-query", diff --git a/src/common/base/src/readable_size.rs b/src/common/base/src/readable_size.rs index 21908526c72a..4298989291b8 100644 --- a/src/common/base/src/readable_size.rs +++ b/src/common/base/src/readable_size.rs @@ -19,7 +19,7 @@ pub const GIB: u64 = MIB * BINARY_DATA_MAGNITUDE; pub const TIB: u64 = GIB * BINARY_DATA_MAGNITUDE; pub const PIB: u64 = TIB * BINARY_DATA_MAGNITUDE; -#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd)] +#[derive(Clone, Copy, PartialEq, Eq, Ord, PartialOrd, Default)] pub struct ReadableSize(pub u64); impl ReadableSize { diff --git a/src/sql/src/statements/alter.rs b/src/sql/src/statements/alter.rs index 174bdbbdc310..df148ae5b63d 100644 --- a/src/sql/src/statements/alter.rs +++ b/src/sql/src/statements/alter.rs @@ -72,29 +72,20 @@ pub enum AlterTableOperation { target_type: DataType, }, /// `SET =
` - SetTableOptions { - options: Vec, - }, - UnsetTableOptions { - keys: Vec, - }, + SetTableOptions { options: Vec }, + /// `UNSET
` + UnsetTableOptions { keys: Vec }, /// `DROP COLUMN ` - DropColumn { - name: Ident, - }, + DropColumn { name: Ident }, /// `RENAME ` - RenameTable { - new_table_name: String, - }, + RenameTable { new_table_name: String }, /// `MODIFY COLUMN SET FULLTEXT [WITH ]` SetColumnFulltext { column_name: Ident, options: FulltextOptions, }, /// `MODIFY COLUMN UNSET FULLTEXT` - UnsetColumnFulltext { - column_name: Ident, - }, + UnsetColumnFulltext { column_name: Ident }, } impl Display for AlterTableOperation { diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index cbac9df7133a..c408992bd508 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -18,6 +18,7 @@ unstable = ["nix"] arbitrary = { version = "1.3.0", features = ["derive"] } async-trait = { workspace = true } chrono = { workspace = true } +common-base = { workspace = true } common-error = { workspace = true } common-macro = { workspace = true } common-query = { workspace = true } @@ -67,14 +68,14 @@ dotenv.workspace = true [[bin]] name = "fuzz_create_table" -path = "targets/fuzz_create_table.rs" +path = "targets/ddl/fuzz_create_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_create_logical_table" -path = "targets/fuzz_create_logical_table.rs" +path = "targets/ddl/fuzz_create_logical_table.rs" test = false bench = false doc = false @@ -95,21 +96,21 @@ doc = false [[bin]] name = "fuzz_alter_table" -path = "targets/fuzz_alter_table.rs" +path = "targets/ddl/fuzz_alter_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_alter_logical_table" -path = "targets/fuzz_alter_logical_table.rs" +path = "targets/ddl/fuzz_alter_logical_table.rs" test = false bench = false doc = false [[bin]] name = "fuzz_create_database" -path = "targets/fuzz_create_database.rs" +path = "targets/ddl/fuzz_create_database.rs" test = false bench = false doc = false diff --git a/tests-fuzz/src/context.rs b/tests-fuzz/src/context.rs index 8cfd0ca9fa43..d0d5dee72dd7 100644 --- a/tests-fuzz/src/context.rs +++ b/tests-fuzz/src/context.rs @@ -21,7 +21,7 @@ use snafu::{ensure, OptionExt}; use crate::error::{self, Result}; use crate::generator::Random; -use crate::ir::alter_expr::AlterTableOperation; +use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption}; use crate::ir::{AlterTableExpr, Column, CreateTableExpr, Ident}; pub type TableContextRef = Arc; @@ -35,6 +35,7 @@ pub struct TableContext { // GreptimeDB specific options pub partition: Option, pub primary_keys: Vec, + pub table_options: Vec, } impl From<&CreateTableExpr> for TableContext { @@ -52,6 +53,7 @@ impl From<&CreateTableExpr> for TableContext { columns: columns.clone(), partition: partition.clone(), primary_keys: primary_keys.clone(), + table_options: vec![], } } } @@ -64,7 +66,7 @@ impl TableContext { /// Applies the [AlterTableExpr]. pub fn alter(mut self, expr: AlterTableExpr) -> Result { - match expr.alter_options { + match expr.alter_kinds { AlterTableOperation::AddColumn { column, location } => { ensure!( !self.columns.iter().any(|col| col.name == column.name), @@ -140,6 +142,25 @@ impl TableContext { } Ok(self) } + AlterTableOperation::SetTableOptions { options } => { + for option in options { + if let Some(idx) = self + .table_options + .iter() + .position(|opt| opt.key() == option.key()) + { + self.table_options[idx] = option; + } else { + self.table_options.push(option); + } + } + Ok(self) + } + AlterTableOperation::UnsetTableOptions { keys } => { + self.table_options + .retain(|opt| !keys.contains(&opt.key().to_string())); + Ok(self) + } } } @@ -171,10 +192,11 @@ impl TableContext { #[cfg(test)] mod tests { use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::TableContext; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column, Ident}; @@ -185,11 +207,12 @@ mod tests { columns: vec![], partition: None, primary_keys: vec![], + table_options: vec![], }; // Add a column let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "a".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -205,7 +228,7 @@ mod tests { // Add a column at first let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "b".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -221,7 +244,7 @@ mod tests { // Add a column after "b" let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "c".into(), column_type: ConcreteDataType::timestamp_microsecond_datatype(), @@ -239,10 +262,32 @@ mod tests { // Drop the column "b" let expr = AlterTableExpr { table_name: "foo".into(), - alter_options: AlterTableOperation::DropColumn { name: "b".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "b".into() }, }; let table_ctx = table_ctx.alter(expr).unwrap(); assert_eq!(table_ctx.columns[1].name, Ident::new("a")); assert_eq!(table_ctx.primary_keys, vec![0, 1]); + + // Set table options + let ttl_option = AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))); + let expr = AlterTableExpr { + table_name: "foo".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ttl_option.clone()], + }, + }; + let table_ctx = table_ctx.alter(expr).unwrap(); + assert_eq!(table_ctx.table_options.len(), 1); + assert_eq!(table_ctx.table_options[0], ttl_option); + + // Unset table options + let expr = AlterTableExpr { + table_name: "foo".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec![ttl_option.key().to_string()], + }, + }; + let table_ctx = table_ctx.alter(expr).unwrap(); + assert_eq!(table_ctx.table_options.len(), 0); } } diff --git a/tests-fuzz/src/generator/alter_expr.rs b/tests-fuzz/src/generator/alter_expr.rs index 03aed702fbad..0c5a62899953 100644 --- a/tests-fuzz/src/generator/alter_expr.rs +++ b/tests-fuzz/src/generator/alter_expr.rs @@ -14,17 +14,19 @@ use std::marker::PhantomData; +use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; use datatypes::data_type::ConcreteDataType; use derive_builder::Builder; use rand::Rng; use snafu::ensure; +use strum::IntoEnumIterator; use crate::context::TableContextRef; use crate::error::{self, Error, Result}; use crate::fake::WordGenerator; use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Generator, Random}; -use crate::ir::alter_expr::{AlterTableExpr, AlterTableOperation}; +use crate::ir::alter_expr::{AlterTableExpr, AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{ droppable_columns, generate_columns, generate_random_value, modifiable_columns, Column, @@ -107,7 +109,7 @@ impl Generator for AlterExprAddColumnGenera .remove(0); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::AddColumn { column, location }, + alter_kinds: AlterTableOperation::AddColumn { column, location }, }) } } @@ -130,7 +132,7 @@ impl Generator for AlterExprDropColumnGenerator { let name = droppable[rng.gen_range(0..droppable.len())].name.clone(); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::DropColumn { name }, + alter_kinds: AlterTableOperation::DropColumn { name }, }) } } @@ -153,7 +155,7 @@ impl Generator for AlterExprRenameGenerator { .generate_unique_table_name(rng, self.name_generator.as_ref()); Ok(AlterTableExpr { table_name: self.table_ctx.name.clone(), - alter_options: AlterTableOperation::RenameTable { new_table_name }, + alter_kinds: AlterTableOperation::RenameTable { new_table_name }, }) } } @@ -180,7 +182,7 @@ impl Generator for AlterExprModifyDataTypeGenerator Generator for AlterExprModifyDataTypeGenerator { + table_ctx: TableContextRef, + #[builder(default)] + _phantom: PhantomData, +} + +impl Generator for AlterExprSetTableOptionsGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + let all_options = AlterTableOption::iter().collect::>(); + // Generate random distinct options + let mut option_templates_idx = vec![]; + for _ in 1..rng.gen_range(2..=all_options.len()) { + let option = rng.gen_range(0..all_options.len()); + if !option_templates_idx.contains(&option) { + option_templates_idx.push(option); + } + } + let options = option_templates_idx + .iter() + .map(|idx| match all_options[*idx] { + AlterTableOption::Ttl(_) => { + let ttl_type = rng.gen_range(0..3); + match ttl_type { + 0 => { + let duration: u32 = rng.gen(); + AlterTableOption::Ttl(Ttl::Duration((duration as i64).into())) + } + 1 => AlterTableOption::Ttl(Ttl::Instant), + 2 => AlterTableOption::Ttl(Ttl::Forever), + _ => unreachable!(), + } + } + AlterTableOption::TwcsTimeWindow(_) => { + let time_window: u32 = rng.gen(); + AlterTableOption::TwcsTimeWindow((time_window as i64).into()) + } + AlterTableOption::TwcsMaxOutputFileSize(_) => { + let max_output_file_size: u64 = rng.gen(); + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize(max_output_file_size)) + } + AlterTableOption::TwcsMaxInactiveWindowRuns(_) => { + let max_inactive_window_runs: u64 = rng.gen(); + AlterTableOption::TwcsMaxInactiveWindowRuns(max_inactive_window_runs) + } + AlterTableOption::TwcsMaxActiveWindowFiles(_) => { + let max_active_window_files: u64 = rng.gen(); + AlterTableOption::TwcsMaxActiveWindowFiles(max_active_window_files) + } + AlterTableOption::TwcsMaxActiveWindowRuns(_) => { + let max_active_window_runs: u64 = rng.gen(); + AlterTableOption::TwcsMaxActiveWindowRuns(max_active_window_runs) + } + AlterTableOption::TwcsMaxInactiveWindowFiles(_) => { + let max_inactive_window_files: u64 = rng.gen(); + AlterTableOption::TwcsMaxInactiveWindowFiles(max_inactive_window_files) + } + }) + .collect(); + Ok(AlterTableExpr { + table_name: self.table_ctx.name.clone(), + alter_kinds: AlterTableOperation::SetTableOptions { options }, + }) + } +} + +/// Generates the [AlterTableOperation::UnsetTableOptions] of [AlterTableExpr]. +#[derive(Builder)] +#[builder(pattern = "owned")] +pub struct AlterExprUnsetTableOptionsGenerator { + table_ctx: TableContextRef, + #[builder(default)] + _phantom: PhantomData, +} + +impl Generator for AlterExprUnsetTableOptionsGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + let all_options = AlterTableOption::iter().collect::>(); + // Generate random distinct options + let mut option_templates_idx = vec![]; + for _ in 1..rng.gen_range(2..=all_options.len()) { + let option = rng.gen_range(0..all_options.len()); + if !option_templates_idx.contains(&option) { + option_templates_idx.push(option); + } + } + let options = option_templates_idx + .iter() + .map(|idx| all_options[*idx].key().to_string()) + .collect(); + Ok(AlterTableExpr { + table_name: self.table_ctx.name.clone(), + alter_kinds: AlterTableOperation::UnsetTableOptions { keys: options }, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; @@ -220,7 +325,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":1606462472}}]},"location":null}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"AddColumn":{"column":{"name":{"value":"velit","quote_style":null},"column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":1606462472}}]},"location":null}}}"#; assert_eq!(expected, serialized); let expr = AlterExprRenameGeneratorBuilder::default() @@ -230,7 +335,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"RenameTable":{"new_table_name":{"value":"nihil","quote_style":null}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"RenameTable":{"new_table_name":{"value":"nihil","quote_style":null}}}}"#; assert_eq!(expected, serialized); let expr = AlterExprDropColumnGeneratorBuilder::default() @@ -240,17 +345,37 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"DropColumn":{"name":{"value":"cUmquE","quote_style":null}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"DropColumn":{"name":{"value":"cUmquE","quote_style":null}}}}"#; assert_eq!(expected, serialized); let expr = AlterExprModifyDataTypeGeneratorBuilder::default() + .table_ctx(table_ctx.clone()) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let serialized = serde_json::to_string(&expr).unwrap(); + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"ModifyDataType":{"column":{"name":{"value":"toTAm","quote_style":null},"column_type":{"Int64":{}},"options":[]}}}}"#; + assert_eq!(expected, serialized); + + let expr = AlterExprSetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx.clone()) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let serialized = serde_json::to_string(&expr).unwrap(); + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"SetTableOptions":{"options":[{"TwcsMaxActiveWindowRuns":14908016120444947142},{"TwcsMaxActiveWindowFiles":5840340123887173415},{"TwcsMaxOutputFileSize":17740311466571102265}]}}}"#; + assert_eq!(expected, serialized); + + let expr = AlterExprUnsetTableOptionsGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_options":{"ModifyDataType":{"column":{"name":{"value":"toTAm","quote_style":null},"column_type":{"Int64":{}},"options":[]}}}}"#; + let expected = r#"{"table_name":{"value":"animI","quote_style":null},"alter_kinds":{"UnsetTableOptions":{"keys":["compaction.twcs.max_active_window_runs"]}}}"#; assert_eq!(expected, serialized); } } diff --git a/tests-fuzz/src/ir.rs b/tests-fuzz/src/ir.rs index b9d13ca9fba3..ae6edd595c85 100644 --- a/tests-fuzz/src/ir.rs +++ b/tests-fuzz/src/ir.rs @@ -24,7 +24,7 @@ use std::collections::HashMap; use std::sync::{Arc, Mutex}; use std::time::Duration; -pub use alter_expr::AlterTableExpr; +pub use alter_expr::{AlterTableExpr, AlterTableOption}; use common_time::timestamp::TimeUnit; use common_time::{Date, DateTime, Timestamp}; pub use create_expr::{CreateDatabaseExpr, CreateTableExpr}; diff --git a/tests-fuzz/src/ir/alter_expr.rs b/tests-fuzz/src/ir/alter_expr.rs index a9fdc18c2228..1d637ff6604c 100644 --- a/tests-fuzz/src/ir/alter_expr.rs +++ b/tests-fuzz/src/ir/alter_expr.rs @@ -12,16 +12,28 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::fmt::Display; +use std::str::FromStr; + +use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; +use common_time::{Duration, FOREVER, INSTANT}; use derive_builder::Builder; use serde::{Deserialize, Serialize}; +use store_api::mito_engine_options::{ + APPEND_MODE_KEY, COMPACTION_TYPE, TTL_KEY, TWCS_MAX_ACTIVE_WINDOW_FILES, + TWCS_MAX_ACTIVE_WINDOW_RUNS, TWCS_MAX_INACTIVE_WINDOW_FILES, TWCS_MAX_INACTIVE_WINDOW_RUNS, + TWCS_MAX_OUTPUT_FILE_SIZE, TWCS_TIME_WINDOW, +}; +use strum::EnumIter; +use crate::error::{self, Result}; use crate::ir::{Column, Ident}; #[derive(Debug, Builder, Clone, Serialize, Deserialize)] pub struct AlterTableExpr { pub table_name: Ident, - pub alter_options: AlterTableOperation, + pub alter_kinds: AlterTableOperation, } #[derive(Debug, Clone, Serialize, Deserialize)] @@ -37,4 +49,196 @@ pub enum AlterTableOperation { RenameTable { new_table_name: Ident }, /// `MODIFY COLUMN ` ModifyDataType { column: Column }, + /// `SET
=
` + SetTableOptions { options: Vec }, + /// `UNSET
` + UnsetTableOptions { keys: Vec }, +} + +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, Default)] +pub enum Ttl { + Duration(Duration), + Instant, + #[default] + Forever, +} + +impl Display for Ttl { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Ttl::Duration(d) => write!(f, "{}", d), + Ttl::Instant => write!(f, "{}", INSTANT), + Ttl::Forever => write!(f, "{}", FOREVER), + } + } +} + +#[derive(Debug, EnumIter, Clone, Serialize, Deserialize, PartialEq, Eq)] +pub enum AlterTableOption { + Ttl(Ttl), + TwcsTimeWindow(Duration), + TwcsMaxOutputFileSize(ReadableSize), + TwcsMaxInactiveWindowFiles(u64), + TwcsMaxActiveWindowFiles(u64), + TwcsMaxInactiveWindowRuns(u64), + TwcsMaxActiveWindowRuns(u64), +} + +impl AlterTableOption { + pub fn key(&self) -> &str { + match self { + AlterTableOption::Ttl(_) => TTL_KEY, + AlterTableOption::TwcsTimeWindow(_) => TWCS_TIME_WINDOW, + AlterTableOption::TwcsMaxOutputFileSize(_) => TWCS_MAX_OUTPUT_FILE_SIZE, + AlterTableOption::TwcsMaxInactiveWindowFiles(_) => TWCS_MAX_INACTIVE_WINDOW_FILES, + AlterTableOption::TwcsMaxActiveWindowFiles(_) => TWCS_MAX_ACTIVE_WINDOW_FILES, + AlterTableOption::TwcsMaxInactiveWindowRuns(_) => TWCS_MAX_INACTIVE_WINDOW_RUNS, + AlterTableOption::TwcsMaxActiveWindowRuns(_) => TWCS_MAX_ACTIVE_WINDOW_RUNS, + } + } + + /// Parses the AlterTableOption from a key-value pair + fn parse_kv(key: &str, value: &str) -> Result { + match key { + TTL_KEY => { + let ttl = if value.to_lowercase() == INSTANT { + Ttl::Instant + } else if value.to_lowercase() == FOREVER { + Ttl::Forever + } else { + let duration = humantime::parse_duration(value).unwrap(); + Ttl::Duration(duration.into()) + }; + Ok(AlterTableOption::Ttl(ttl)) + } + TWCS_MAX_ACTIVE_WINDOW_RUNS => { + let runs = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxActiveWindowRuns(runs)) + } + TWCS_MAX_ACTIVE_WINDOW_FILES => { + let files = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxActiveWindowFiles(files)) + } + TWCS_MAX_INACTIVE_WINDOW_RUNS => { + let runs = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxInactiveWindowRuns(runs)) + } + TWCS_MAX_INACTIVE_WINDOW_FILES => { + let files = value.parse().unwrap(); + Ok(AlterTableOption::TwcsMaxInactiveWindowFiles(files)) + } + TWCS_MAX_OUTPUT_FILE_SIZE => { + // may be "1M" instead of "1 MiB" + let value = if value.ends_with("B") { + value.to_string() + } else { + format!("{}B", value) + }; + let size = ReadableSize::from_str(&value).unwrap(); + Ok(AlterTableOption::TwcsMaxOutputFileSize(size)) + } + TWCS_TIME_WINDOW => { + let time = humantime::parse_duration(value).unwrap(); + Ok(AlterTableOption::TwcsTimeWindow(time.into())) + } + _ => error::UnexpectedSnafu { + violated: format!("Unknown table option key: {}", key), + } + .fail(), + } + } + + /// Parses the AlterTableOption from comma-separated string + pub fn parse_kv_pairs(option_string: &str) -> Result> { + let mut options = vec![]; + for pair in option_string.split(',') { + let pair = pair.trim(); + let (key, value) = pair.split_once('=').unwrap(); + let key = key.trim().replace("\'", ""); + let value = value.trim().replace('\'', ""); + // Currently we have only one compaction type, so we ignore it + // Cautious: COMPACTION_TYPE may be kept even if there are no compaction options enabled + if key == COMPACTION_TYPE || key == APPEND_MODE_KEY { + continue; + } else { + let option = AlterTableOption::parse_kv(&key, &value)?; + options.push(option); + } + } + Ok(options) + } +} + +impl Display for AlterTableOption { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + AlterTableOption::Ttl(d) => write!(f, "'{}' = '{}'", TTL_KEY, d), + AlterTableOption::TwcsTimeWindow(d) => write!(f, "'{}' = '{}'", TWCS_TIME_WINDOW, d), + AlterTableOption::TwcsMaxOutputFileSize(s) => { + // Caution: to_string loses precision for ReadableSize + write!(f, "'{}' = '{}'", TWCS_MAX_OUTPUT_FILE_SIZE, s) + } + AlterTableOption::TwcsMaxInactiveWindowFiles(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_INACTIVE_WINDOW_FILES, u) + } + AlterTableOption::TwcsMaxActiveWindowFiles(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_ACTIVE_WINDOW_FILES, u) + } + AlterTableOption::TwcsMaxInactiveWindowRuns(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_INACTIVE_WINDOW_RUNS, u) + } + AlterTableOption::TwcsMaxActiveWindowRuns(u) => { + write!(f, "'{}' = '{}'", TWCS_MAX_ACTIVE_WINDOW_RUNS, u) + } + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_parse_kv_pairs() { + let option_string = + "compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = 'forever'"; + let options = AlterTableOption::parse_kv_pairs(option_string).unwrap(); + assert_eq!(options.len(), 2); + assert_eq!( + options, + vec![ + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1MB").unwrap()), + AlterTableOption::Ttl(Ttl::Forever), + ] + ); + + let option_string = "compaction.twcs.max_active_window_files = '5030469694939972912', + compaction.twcs.max_active_window_runs = '8361168990283879099', + compaction.twcs.max_inactive_window_files = '6028716566907830876', + compaction.twcs.max_inactive_window_runs = '10622283085591494074', + compaction.twcs.max_output_file_size = '15686.4PiB', + compaction.twcs.time_window = '2061999256ms', + compaction.type = 'twcs', + ttl = '1month 3days 15h 49m 8s 279ms'"; + let options = AlterTableOption::parse_kv_pairs(option_string).unwrap(); + assert_eq!(options.len(), 7); + let expected = vec![ + AlterTableOption::TwcsMaxActiveWindowFiles(5030469694939972912), + AlterTableOption::TwcsMaxActiveWindowRuns(8361168990283879099), + AlterTableOption::TwcsMaxInactiveWindowFiles(6028716566907830876), + AlterTableOption::TwcsMaxInactiveWindowRuns(10622283085591494074), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("15686.4PiB").unwrap()), + AlterTableOption::TwcsTimeWindow(Duration::new_nanosecond(2_061_999_256_000_000)), + AlterTableOption::Ttl(Ttl::Duration(Duration::new_millisecond( + // A month is 2_630_016 seconds + 2_630_016 * 1000 + + 3 * 24 * 60 * 60 * 1000 + + 15 * 60 * 60 * 1000 + + 49 * 60 * 1000 + + 8 * 1000 + + 279, + ))), + ]; + assert_eq!(options, expected); + } } diff --git a/tests-fuzz/src/test_utils.rs b/tests-fuzz/src/test_utils.rs index e65548969ac1..bef96a1fd7f9 100644 --- a/tests-fuzz/src/test_utils.rs +++ b/tests-fuzz/src/test_utils.rs @@ -55,5 +55,6 @@ pub fn new_test_ctx() -> TableContext { ], partition: None, primary_keys: vec![], + table_options: vec![], } } diff --git a/tests-fuzz/src/translator.rs b/tests-fuzz/src/translator.rs index 1745aa933601..673b543f2c0b 100644 --- a/tests-fuzz/src/translator.rs +++ b/tests-fuzz/src/translator.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +mod common; pub mod mysql; pub mod postgres; diff --git a/tests-fuzz/src/translator/common.rs b/tests-fuzz/src/translator/common.rs new file mode 100644 index 000000000000..2b968ed4391a --- /dev/null +++ b/tests-fuzz/src/translator/common.rs @@ -0,0 +1,67 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::Display; + +use super::DslTranslator; +use crate::error::{Error, Result}; +use crate::ir::alter_expr::AlterTableOperation; +use crate::ir::{AlterTableExpr, AlterTableOption}; + +/// Shared translator for `ALTER TABLE` operations. +pub(crate) struct CommonAlterTableTranslator; + +impl DslTranslator for CommonAlterTableTranslator { + type Error = Error; + + fn translate(&self, input: &AlterTableExpr) -> Result { + Ok(match &input.alter_kinds { + AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), + AlterTableOperation::SetTableOptions { options } => { + Self::format_set_table_options(&input.table_name, options) + } + AlterTableOperation::UnsetTableOptions { keys } => { + Self::format_unset_table_options(&input.table_name, keys) + } + _ => unimplemented!(), + }) + } +} + +impl CommonAlterTableTranslator { + fn format_drop(name: impl Display, column: impl Display) -> String { + format!("ALTER TABLE {name} DROP COLUMN {column};") + } + + fn format_set_table_options(name: impl Display, options: &[AlterTableOption]) -> String { + format!( + "ALTER TABLE {name} SET {};", + options + .iter() + .map(|option| option.to_string()) + .collect::>() + .join(", ") + ) + } + + fn format_unset_table_options(name: impl Display, keys: &[String]) -> String { + format!( + "ALTER TABLE {name} UNSET {};", + keys.iter() + .map(|key| format!("'{}'", key)) + .collect::>() + .join(", ") + ) + } +} diff --git a/tests-fuzz/src/translator/mysql/alter_expr.rs b/tests-fuzz/src/translator/mysql/alter_expr.rs index c973d7cb4b2a..3bf30b09a3ba 100644 --- a/tests-fuzz/src/translator/mysql/alter_expr.rs +++ b/tests-fuzz/src/translator/mysql/alter_expr.rs @@ -22,6 +22,7 @@ use crate::error::{Error, Result}; use crate::ir::alter_expr::AlterTableOperation; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; +use crate::translator::common::CommonAlterTableTranslator; use crate::translator::DslTranslator; pub struct AlterTableExprTranslator; @@ -30,26 +31,22 @@ impl DslTranslator for AlterTableExprTranslator { type Error = Error; fn translate(&self, input: &AlterTableExpr) -> Result { - Ok(match &input.alter_options { + Ok(match &input.alter_kinds { AlterTableOperation::AddColumn { column, location } => { Self::format_add_column(&input.table_name, column, location) } - AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), AlterTableOperation::RenameTable { new_table_name } => { Self::format_rename(&input.table_name, new_table_name) } AlterTableOperation::ModifyDataType { column } => { Self::format_modify_data_type(&input.table_name, column) } + _ => CommonAlterTableTranslator.translate(input)?, }) } } impl AlterTableExprTranslator { - fn format_drop(name: impl Display, column: impl Display) -> String { - format!("ALTER TABLE {name} DROP COLUMN {column};") - } - fn format_rename(name: impl Display, new_name: impl Display) -> String { format!("ALTER TABLE {name} RENAME {new_name};") } @@ -119,11 +116,15 @@ impl AlterTableExprTranslator { #[cfg(test)] mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::AlterTableExprTranslator; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; use crate::translator::DslTranslator; @@ -132,7 +133,7 @@ mod tests { fn test_alter_table_expr() { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -150,7 +151,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::RenameTable { + alter_kinds: AlterTableOperation::RenameTable { new_table_name: "foo".into(), }, }; @@ -160,7 +161,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::DropColumn { name: "foo".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "foo".into() }, }; let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); @@ -168,7 +169,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::ModifyDataType { + alter_kinds: AlterTableOperation::ModifyDataType { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -180,4 +181,48 @@ mod tests { let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); assert_eq!("ALTER TABLE test MODIFY COLUMN host STRING;", output); } + + #[test] + fn test_alter_table_expr_set_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))), + AlterTableOption::TwcsTimeWindow(Duration::new_second(60)), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1GB").unwrap()), + AlterTableOption::TwcsMaxActiveWindowFiles(10), + AlterTableOption::TwcsMaxActiveWindowRuns(10), + AlterTableOption::TwcsMaxInactiveWindowFiles(5), + AlterTableOption::TwcsMaxInactiveWindowRuns(5), + ], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = concat!( + "ALTER TABLE test SET 'ttl' = '60s', ", + "'compaction.twcs.time_window' = '60s', ", + "'compaction.twcs.max_output_file_size' = '1.0GiB', ", + "'compaction.twcs.max_active_window_files' = '10', ", + "'compaction.twcs.max_active_window_runs' = '10', ", + "'compaction.twcs.max_inactive_window_files' = '5', ", + "'compaction.twcs.max_inactive_window_runs' = '5';" + ); + assert_eq!(expected, output); + } + + #[test] + fn test_alter_table_expr_unset_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec!["ttl".into(), "compaction.twcs.time_window".into()], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = "ALTER TABLE test UNSET 'ttl', 'compaction.twcs.time_window';"; + assert_eq!(expected, output); + } } diff --git a/tests-fuzz/src/translator/postgres/alter_expr.rs b/tests-fuzz/src/translator/postgres/alter_expr.rs index 42db202efef0..f66ce0db923d 100644 --- a/tests-fuzz/src/translator/postgres/alter_expr.rs +++ b/tests-fuzz/src/translator/postgres/alter_expr.rs @@ -21,6 +21,7 @@ use crate::error::{Error, Result}; use crate::ir::alter_expr::AlterTableOperation; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; +use crate::translator::common::CommonAlterTableTranslator; use crate::translator::postgres::sql_data_type_to_postgres_data_type; use crate::translator::DslTranslator; @@ -30,26 +31,22 @@ impl DslTranslator for AlterTableExprTranslator { type Error = Error; fn translate(&self, input: &AlterTableExpr) -> Result { - Ok(match &input.alter_options { + Ok(match &input.alter_kinds { AlterTableOperation::AddColumn { column, .. } => { Self::format_add_column(&input.table_name, column) } - AlterTableOperation::DropColumn { name } => Self::format_drop(&input.table_name, name), AlterTableOperation::RenameTable { new_table_name } => { Self::format_rename(&input.table_name, new_table_name) } AlterTableOperation::ModifyDataType { column } => { Self::format_modify_data_type(&input.table_name, column) } + _ => CommonAlterTableTranslator.translate(input)?, }) } } impl AlterTableExprTranslator { - fn format_drop(name: impl Display, column: impl Display) -> String { - format!("ALTER TABLE {name} DROP COLUMN {column};") - } - fn format_rename(name: impl Display, new_name: impl Display) -> String { format!("ALTER TABLE {name} RENAME TO {new_name};") } @@ -116,11 +113,15 @@ impl AlterTableExprTranslator { #[cfg(test)] mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; use common_query::AddColumnLocation; + use common_time::Duration; use datatypes::data_type::ConcreteDataType; use super::AlterTableExprTranslator; - use crate::ir::alter_expr::AlterTableOperation; + use crate::ir::alter_expr::{AlterTableOperation, AlterTableOption, Ttl}; use crate::ir::create_expr::ColumnOption; use crate::ir::{AlterTableExpr, Column}; use crate::translator::DslTranslator; @@ -129,7 +130,7 @@ mod tests { fn test_alter_table_expr() { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::AddColumn { + alter_kinds: AlterTableOperation::AddColumn { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -145,7 +146,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::RenameTable { + alter_kinds: AlterTableOperation::RenameTable { new_table_name: "foo".into(), }, }; @@ -155,7 +156,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::DropColumn { name: "foo".into() }, + alter_kinds: AlterTableOperation::DropColumn { name: "foo".into() }, }; let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); @@ -163,7 +164,7 @@ mod tests { let alter_expr = AlterTableExpr { table_name: "test".into(), - alter_options: AlterTableOperation::ModifyDataType { + alter_kinds: AlterTableOperation::ModifyDataType { column: Column { name: "host".into(), column_type: ConcreteDataType::string_datatype(), @@ -176,4 +177,48 @@ mod tests { // Ignores the location and primary key option. assert_eq!("ALTER TABLE test MODIFY COLUMN host STRING;", output); } + + #[test] + fn test_alter_table_expr_set_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::SetTableOptions { + options: vec![ + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(60))), + AlterTableOption::TwcsTimeWindow(Duration::new_second(60)), + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1GB").unwrap()), + AlterTableOption::TwcsMaxActiveWindowFiles(10), + AlterTableOption::TwcsMaxActiveWindowRuns(10), + AlterTableOption::TwcsMaxInactiveWindowFiles(5), + AlterTableOption::TwcsMaxInactiveWindowRuns(5), + ], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = concat!( + "ALTER TABLE test SET 'ttl' = '60s', ", + "'compaction.twcs.time_window' = '60s', ", + "'compaction.twcs.max_output_file_size' = '1.0GiB', ", + "'compaction.twcs.max_active_window_files' = '10', ", + "'compaction.twcs.max_active_window_runs' = '10', ", + "'compaction.twcs.max_inactive_window_files' = '5', ", + "'compaction.twcs.max_inactive_window_runs' = '5';" + ); + assert_eq!(expected, output); + } + + #[test] + fn test_alter_table_expr_unset_table_options() { + let alter_expr = AlterTableExpr { + table_name: "test".into(), + alter_kinds: AlterTableOperation::UnsetTableOptions { + keys: vec!["ttl".into(), "compaction.twcs.time_window".into()], + }, + }; + + let output = AlterTableExprTranslator.translate(&alter_expr).unwrap(); + let expected = "ALTER TABLE test UNSET 'ttl', 'compaction.twcs.time_window';"; + assert_eq!(expected, output); + } } diff --git a/tests-fuzz/src/validator.rs b/tests-fuzz/src/validator.rs index cf2df9af229c..406dd66041a2 100644 --- a/tests-fuzz/src/validator.rs +++ b/tests-fuzz/src/validator.rs @@ -14,3 +14,4 @@ pub mod column; pub mod row; +pub mod table; diff --git a/tests-fuzz/src/validator/table.rs b/tests-fuzz/src/validator/table.rs new file mode 100644 index 000000000000..406719b2d660 --- /dev/null +++ b/tests-fuzz/src/validator/table.rs @@ -0,0 +1,103 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use snafu::{ensure, ResultExt}; +use sqlx::database::HasArguments; +use sqlx::{ColumnIndex, Database, Decode, Encode, Executor, IntoArguments, Row, Type}; + +use crate::error::{self, Result, UnexpectedSnafu}; +use crate::ir::alter_expr::AlterTableOption; + +/// Parses table options from the result of `SHOW CREATE TABLE` +/// An example of the result of `SHOW CREATE TABLE`: +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +/// | Table | Create Table | +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +/// | json | CREATE TABLE IF NOT EXISTS `json` (`ts` TIMESTAMP(3) NOT NULL, `j` JSON NULL, TIME INDEX (`ts`)) ENGINE=mito WITH(compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = '1day') | +/// +-------+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +fn parse_show_create(show_create: &str) -> Result> { + if let Some(option_start) = show_create.find("WITH(") { + let option_end = { + let remain_str = &show_create[option_start..]; + if let Some(end) = remain_str.find(')') { + end + option_start + } else { + return UnexpectedSnafu { + violated: format!("Cannot find the end of the options in: {}", show_create), + } + .fail(); + } + }; + let options = &show_create[option_start + 5..option_end]; + Ok(AlterTableOption::parse_kv_pairs(options)?) + } else { + Ok(vec![]) + } +} + +/// Fetches table options from the context +pub async fn fetch_table_options<'a, DB, E>(e: E, sql: &'a str) -> Result> +where + DB: Database, + >::Arguments: IntoArguments<'a, DB>, + for<'c> E: 'a + Executor<'c, Database = DB>, + for<'c> String: Decode<'c, DB> + Type, + for<'c> String: Encode<'c, DB> + Type, + usize: ColumnIndex<::Row>, +{ + let fetched_rows = sqlx::query(sql) + .fetch_all(e) + .await + .context(error::ExecuteQuerySnafu { sql })?; + ensure!( + fetched_rows.len() == 1, + error::AssertSnafu { + reason: format!( + "Expected fetched row length: 1, got: {}", + fetched_rows.len(), + ) + } + ); + + let row = fetched_rows.first().unwrap(); + let show_create = row.try_get::(1).unwrap(); + parse_show_create(&show_create) +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + + use common_base::readable_size::ReadableSize; + use common_time::Duration; + + use super::*; + use crate::ir::alter_expr::Ttl; + use crate::ir::AlterTableOption; + + #[test] + fn test_parse_show_create() { + let show_create = "CREATE TABLE IF NOT EXISTS `json` (`ts` TIMESTAMP(3) NOT NULL, `j` JSON NULL, TIME INDEX (`ts`)) ENGINE=mito WITH(compaction.twcs.max_output_file_size = '1M', compaction.type = 'twcs', ttl = '1day')"; + let options = parse_show_create(show_create).unwrap(); + assert_eq!(options.len(), 2); + assert_eq!( + options[0], + AlterTableOption::TwcsMaxOutputFileSize(ReadableSize::from_str("1MB").unwrap()) + ); + assert_eq!( + options[1], + AlterTableOption::Ttl(Ttl::Duration(Duration::new_second(24 * 60 * 60))) + ); + } +} diff --git a/tests-fuzz/targets/fuzz_alter_logical_table.rs b/tests-fuzz/targets/ddl/fuzz_alter_logical_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_alter_logical_table.rs rename to tests-fuzz/targets/ddl/fuzz_alter_logical_table.rs diff --git a/tests-fuzz/targets/fuzz_alter_table.rs b/tests-fuzz/targets/ddl/fuzz_alter_table.rs similarity index 72% rename from tests-fuzz/targets/fuzz_alter_table.rs rename to tests-fuzz/targets/ddl/fuzz_alter_table.rs index 7f2a809c9e14..247d7632eeb5 100644 --- a/tests-fuzz/targets/fuzz_alter_table.rs +++ b/tests-fuzz/targets/ddl/fuzz_alter_table.rs @@ -34,10 +34,13 @@ use tests_fuzz::fake::{ use tests_fuzz::generator::alter_expr::{ AlterExprAddColumnGeneratorBuilder, AlterExprDropColumnGeneratorBuilder, AlterExprModifyDataTypeGeneratorBuilder, AlterExprRenameGeneratorBuilder, + AlterExprSetTableOptionsGeneratorBuilder, AlterExprUnsetTableOptionsGeneratorBuilder, }; use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder; use tests_fuzz::generator::Generator; -use tests_fuzz::ir::{droppable_columns, modifiable_columns, AlterTableExpr, CreateTableExpr}; +use tests_fuzz::ir::{ + droppable_columns, modifiable_columns, AlterTableExpr, AlterTableOption, CreateTableExpr, +}; use tests_fuzz::translator::mysql::alter_expr::AlterTableExprTranslator; use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; use tests_fuzz::translator::DslTranslator; @@ -62,11 +65,13 @@ struct FuzzInput { } #[derive(Debug, EnumIter)] -enum AlterTableOption { +enum AlterTableKind { AddColumn, DropColumn, RenameTable, ModifyDataType, + SetTableOptions, + UnsetTableOptions, } fn generate_create_table_expr(rng: &mut R) -> Result { @@ -93,23 +98,23 @@ fn generate_alter_table_expr( table_ctx: TableContextRef, rng: &mut R, ) -> Result { - let options = AlterTableOption::iter().collect::>(); - match options[rng.gen_range(0..options.len())] { - AlterTableOption::DropColumn if !droppable_columns(&table_ctx.columns).is_empty() => { + let kinds = AlterTableKind::iter().collect::>(); + match kinds[rng.gen_range(0..kinds.len())] { + AlterTableKind::DropColumn if !droppable_columns(&table_ctx.columns).is_empty() => { AlterExprDropColumnGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(rng) } - AlterTableOption::ModifyDataType if !modifiable_columns(&table_ctx.columns).is_empty() => { + AlterTableKind::ModifyDataType if !modifiable_columns(&table_ctx.columns).is_empty() => { AlterExprModifyDataTypeGeneratorBuilder::default() .table_ctx(table_ctx) .build() .unwrap() .generate(rng) } - AlterTableOption::RenameTable => AlterExprRenameGeneratorBuilder::default() + AlterTableKind::RenameTable => AlterExprRenameGeneratorBuilder::default() .table_ctx(table_ctx) .name_generator(Box::new(MappedGenerator::new( WordGenerator, @@ -118,6 +123,20 @@ fn generate_alter_table_expr( .build() .unwrap() .generate(rng), + AlterTableKind::SetTableOptions => { + let expr_generator = AlterExprSetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx) + .build() + .unwrap(); + expr_generator.generate(rng) + } + AlterTableKind::UnsetTableOptions => { + let expr_generator = AlterExprUnsetTableOptionsGeneratorBuilder::default() + .table_ctx(table_ctx) + .build() + .unwrap(); + expr_generator.generate(rng) + } _ => { let location = rng.gen_bool(0.5); let expr_generator = AlterExprAddColumnGeneratorBuilder::default() @@ -179,6 +198,31 @@ async fn execute_alter_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> { let mut columns = table_ctx.columns.clone(); columns.sort_by(|a, b| a.name.value.cmp(&b.name.value)); validator::column::assert_eq(&column_entries, &columns)?; + + // Validates table options + let sql = format!("SHOW CREATE TABLE {}", table_ctx.name); + let mut table_options = validator::table::fetch_table_options(&ctx.greptime, &sql).await?; + table_options.sort_by(|a, b| a.key().cmp(b.key())); + let mut expected_table_options = table_ctx.table_options.clone(); + expected_table_options.sort_by(|a, b| a.key().cmp(b.key())); + table_options + .iter() + .zip(expected_table_options.iter()) + .for_each(|(a, b)| { + if let ( + AlterTableOption::TwcsMaxOutputFileSize(a), + AlterTableOption::TwcsMaxOutputFileSize(b), + ) = (a, b) + { + // to_string loses precision for ReadableSize, so the size in generated SQL is not the same as the size in the table context, + // but the string representation should be the same. For example: + // to_string() from_str() + // ReadableSize(13001360408898724524) ------------> "11547.5PiB" -----------> ReadableSize(13001329174265200640) + assert_eq!(a.to_string(), b.to_string()); + } else { + assert_eq!(a, b); + } + }); } // Cleans up diff --git a/tests-fuzz/targets/fuzz_create_database.rs b/tests-fuzz/targets/ddl/fuzz_create_database.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_database.rs rename to tests-fuzz/targets/ddl/fuzz_create_database.rs diff --git a/tests-fuzz/targets/fuzz_create_logical_table.rs b/tests-fuzz/targets/ddl/fuzz_create_logical_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_logical_table.rs rename to tests-fuzz/targets/ddl/fuzz_create_logical_table.rs diff --git a/tests-fuzz/targets/fuzz_create_table.rs b/tests-fuzz/targets/ddl/fuzz_create_table.rs similarity index 100% rename from tests-fuzz/targets/fuzz_create_table.rs rename to tests-fuzz/targets/ddl/fuzz_create_table.rs From 2137c53274d162f4a4131ca0d9b1d5a7bb9f155b Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 12 Dec 2024 12:45:40 +0800 Subject: [PATCH 16/59] feat(index): add `file_size_hint` for remote blob reader (#5147) feat(index): add file_size_hint for remote blob reader --- src/common/base/src/range_read.rs | 17 +++++++++++++++ src/mito2/src/sst/file.rs | 20 ++++++++++++++++++ .../src/sst/index/inverted_index/applier.rs | 17 +++++++++------ .../src/sst/index/inverted_index/creator.rs | 2 +- src/mito2/src/sst/index/store.rs | 21 +++++++++++++++---- src/mito2/src/sst/parquet/reader.rs | 7 +++++-- src/puffin/src/partial_reader/async.rs | 4 ++++ src/puffin/src/puffin_manager.rs | 3 ++- .../fs_puffin_manager/reader.rs | 21 ++++++++++++++++++- 9 files changed, 97 insertions(+), 15 deletions(-) diff --git a/src/common/base/src/range_read.rs b/src/common/base/src/range_read.rs index 91f865d17ef6..61f28cb629fd 100644 --- a/src/common/base/src/range_read.rs +++ b/src/common/base/src/range_read.rs @@ -36,6 +36,11 @@ pub struct Metadata { /// `RangeReader` reads a range of bytes from a source. #[async_trait] pub trait RangeReader: Send + Unpin { + /// Sets the file size hint for the reader. + /// + /// It's used to optimize the reading process by reducing the number of remote requests. + fn with_file_size_hint(&mut self, file_size_hint: u64); + /// Returns the metadata of the source. async fn metadata(&mut self) -> io::Result; @@ -70,6 +75,10 @@ pub trait RangeReader: Send + Unpin { #[async_trait] impl RangeReader for &mut R { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + (*self).with_file_size_hint(file_size_hint) + } + async fn metadata(&mut self) -> io::Result { (*self).metadata().await } @@ -186,6 +195,10 @@ impl AsyncRead for AsyncReadAdapter { #[async_trait] impl RangeReader for Vec { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.len() as u64, @@ -222,6 +235,10 @@ impl FileReader { #[async_trait] impl RangeReader for FileReader { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.content_length, diff --git a/src/mito2/src/sst/file.rs b/src/mito2/src/sst/file.rs index 4353ae55e3e9..5a9932ab433b 100644 --- a/src/mito2/src/sst/file.rs +++ b/src/mito2/src/sst/file.rs @@ -146,13 +146,33 @@ pub enum IndexType { } impl FileMeta { + /// Returns true if the file has an inverted index pub fn inverted_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::InvertedIndex) } + /// Returns true if the file has a fulltext index pub fn fulltext_index_available(&self) -> bool { self.available_indexes.contains(&IndexType::FulltextIndex) } + + /// Returns the size of the inverted index file + pub fn inverted_index_size(&self) -> Option { + if self.available_indexes.len() == 1 && self.inverted_index_available() { + Some(self.index_file_size) + } else { + None + } + } + + /// Returns the size of the fulltext index file + pub fn fulltext_index_size(&self) -> Option { + if self.available_indexes.len() == 1 && self.fulltext_index_available() { + Some(self.index_file_size) + } else { + None + } + } } /// Handle to a SST file. diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index bf5206ef44be..d060d4bec17b 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -113,7 +113,7 @@ impl InvertedIndexApplier { } /// Applies predicates to the provided SST file id and returns the relevant row group ids - pub async fn apply(&self, file_id: FileId) -> Result { + pub async fn apply(&self, file_id: FileId, file_size_hint: Option) -> Result { let _timer = INDEX_APPLY_ELAPSED .with_label_values(&[TYPE_INVERTED_INDEX]) .start_timer(); @@ -129,8 +129,7 @@ impl InvertedIndexApplier { if let Err(err) = other { warn!(err; "An unexpected error occurred while reading the cached index file. Fallback to remote index file.") } - - self.remote_blob_reader(file_id).await? + self.remote_blob_reader(file_id, file_size_hint).await? } }; @@ -181,16 +180,22 @@ impl InvertedIndexApplier { } /// Creates a blob reader from the remote index file. - async fn remote_blob_reader(&self, file_id: FileId) -> Result { + async fn remote_blob_reader( + &self, + file_id: FileId, + file_size_hint: Option, + ) -> Result { let puffin_manager = self .puffin_manager_factory .build(self.store.clone()) .with_puffin_metadata_cache(self.puffin_metadata_cache.clone()); + let file_path = location::index_file_path(&self.region_dir, file_id); puffin_manager .reader(&file_path) .await .context(PuffinBuildReaderSnafu)? + .with_file_size_hint(file_size_hint) .blob(INDEX_BLOB_TYPE) .await .context(PuffinReadBlobSnafu)? @@ -250,7 +255,7 @@ mod tests { Box::new(mock_index_applier), puffin_manager_factory, ); - let output = sst_index_applier.apply(file_id).await.unwrap(); + let output = sst_index_applier.apply(file_id, None).await.unwrap(); assert_eq!( output, ApplyOutput { @@ -290,7 +295,7 @@ mod tests { Box::new(mock_index_applier), puffin_manager_factory, ); - let res = sst_index_applier.apply(file_id).await; + let res = sst_index_applier.apply(file_id, None).await; assert!(format!("{:?}", res.unwrap_err()).contains("Blob not found")); } } diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 029a0da8484f..43cf54fa2811 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -464,7 +464,7 @@ mod tests { .unwrap(); Box::pin(async move { applier - .apply(sst_file_id) + .apply(sst_file_id, None) .await .unwrap() .matched_segment_ids diff --git a/src/mito2/src/sst/index/store.rs b/src/mito2/src/sst/index/store.rs index 2750c69fc249..7322bd4db496 100644 --- a/src/mito2/src/sst/index/store.rs +++ b/src/mito2/src/sst/index/store.rs @@ -68,6 +68,7 @@ impl InstrumentedStore { path: path.to_string(), read_byte_count, read_count, + file_size_hint: None, }) } @@ -262,15 +263,27 @@ pub(crate) struct InstrumentedRangeReader<'a> { path: String, read_byte_count: &'a IntCounter, read_count: &'a IntCounter, + file_size_hint: Option, } #[async_trait] impl RangeReader for InstrumentedRangeReader<'_> { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + self.file_size_hint = Some(file_size_hint); + } + async fn metadata(&mut self) -> io::Result { - let stat = self.store.stat(&self.path).await?; - Ok(Metadata { - content_length: stat.content_length(), - }) + match self.file_size_hint { + Some(file_size_hint) => Ok(Metadata { + content_length: file_size_hint, + }), + None => { + let stat = self.store.stat(&self.path).await?; + Ok(Metadata { + content_length: stat.content_length(), + }) + } + } } async fn read(&mut self, range: Range) -> io::Result { diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index b73026a7a6e3..02c5c2cf3cba 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -475,8 +475,11 @@ impl ParquetReaderBuilder { if !self.file_handle.meta_ref().inverted_index_available() { return false; } - - let apply_output = match index_applier.apply(self.file_handle.file_id()).await { + let file_size_hint = self.file_handle.meta_ref().inverted_index_size(); + let apply_output = match index_applier + .apply(self.file_handle.file_id(), file_size_hint) + .await + { Ok(output) => output, Err(err) => { if cfg!(any(test, feature = "test")) { diff --git a/src/puffin/src/partial_reader/async.rs b/src/puffin/src/partial_reader/async.rs index 3de40cb3a190..4eedd1ee31f5 100644 --- a/src/puffin/src/partial_reader/async.rs +++ b/src/puffin/src/partial_reader/async.rs @@ -23,6 +23,10 @@ use crate::partial_reader::PartialReader; #[async_trait] impl RangeReader for PartialReader { + fn with_file_size_hint(&mut self, _file_size_hint: u64) { + // do nothing + } + async fn metadata(&mut self) -> io::Result { Ok(Metadata { content_length: self.size, diff --git a/src/puffin/src/puffin_manager.rs b/src/puffin/src/puffin_manager.rs index 17101b1662e8..204bc2c66e2e 100644 --- a/src/puffin/src/puffin_manager.rs +++ b/src/puffin/src/puffin_manager.rs @@ -73,11 +73,12 @@ pub struct PutOptions { /// The `PuffinReader` trait provides methods for reading blobs and directories from a Puffin file. #[async_trait] -#[auto_impl::auto_impl(Arc)] pub trait PuffinReader { type Blob: BlobGuard; type Dir: DirGuard; + fn with_file_size_hint(self, file_size_hint: Option) -> Self; + /// Reads a blob from the Puffin file. /// /// The returned `BlobGuard` is used to access the blob data. diff --git a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs index 2e1ae594adc6..a5da2f75f858 100644 --- a/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs +++ b/src/puffin/src/puffin_manager/fs_puffin_manager/reader.rs @@ -43,6 +43,9 @@ pub struct FsPuffinReader { /// The name of the puffin file. puffin_file_name: String, + /// The file size hint. + file_size_hint: Option, + /// The stager. stager: S, @@ -62,6 +65,7 @@ impl FsPuffinReader { ) -> Self { Self { puffin_file_name, + file_size_hint: None, stager, puffin_file_accessor, puffin_file_metadata_cache, @@ -78,11 +82,19 @@ where type Blob = Either, S::Blob>; type Dir = S::Dir; + fn with_file_size_hint(mut self, file_size_hint: Option) -> Self { + self.file_size_hint = file_size_hint; + self + } + async fn blob(&self, key: &str) -> Result { - let reader = self + let mut reader = self .puffin_file_accessor .reader(&self.puffin_file_name) .await?; + if let Some(file_size_hint) = self.file_size_hint { + reader.with_file_size_hint(file_size_hint); + } let mut file = PuffinFileReader::new(reader); let metadata = self.get_puffin_file_metadata(&mut file).await?; @@ -303,6 +315,13 @@ where A: RangeReader, B: RangeReader, { + fn with_file_size_hint(&mut self, file_size_hint: u64) { + match self { + Either::L(a) => a.with_file_size_hint(file_size_hint), + Either::R(b) => b.with_file_size_hint(file_size_hint), + } + } + async fn metadata(&mut self) -> io::Result { match self { Either::L(a) => a.metadata().await, From b8a78b78389ae9edd6b3e4a05ee8697ad0c578a3 Mon Sep 17 00:00:00 2001 From: localhost Date: Thu, 12 Dec 2024 17:01:21 +0800 Subject: [PATCH 17/59] chore: decide tag column in log api follow table schema if table exists (#5138) * chore: decide tag column in log api follow table schema if table exists * chore: add more test for greptime_identity pipeline * chore: change pipeline get_table function signature * chore: change identity_pipeline_inner tag_column_names type --- src/frontend/src/instance/log_handler.rs | 15 ++- .../src/etl/transform/transformer/greptime.rs | 117 +++++++++++++++--- src/servers/src/http/event.rs | 13 +- src/servers/src/query_handler.rs | 8 +- 4 files changed, 130 insertions(+), 23 deletions(-) diff --git a/src/frontend/src/instance/log_handler.rs b/src/frontend/src/instance/log_handler.rs index c3422066a387..9ae782c7d4ab 100644 --- a/src/frontend/src/instance/log_handler.rs +++ b/src/frontend/src/instance/log_handler.rs @@ -25,8 +25,9 @@ use servers::error::{ }; use servers::interceptor::{LogIngestInterceptor, LogIngestInterceptorRef}; use servers::query_handler::PipelineHandler; -use session::context::QueryContextRef; +use session::context::{QueryContext, QueryContextRef}; use snafu::ResultExt; +use table::Table; use crate::instance::Instance; @@ -84,6 +85,18 @@ impl PipelineHandler for Instance { .await .context(PipelineSnafu) } + + async fn get_table( + &self, + table: &str, + query_ctx: &QueryContext, + ) -> std::result::Result>, catalog::error::Error> { + let catalog = query_ctx.current_catalog(); + let schema = query_ctx.current_schema(); + self.catalog_manager + .table(catalog, &schema, table, None) + .await + } } impl Instance { diff --git a/src/pipeline/src/etl/transform/transformer/greptime.rs b/src/pipeline/src/etl/transform/transformer/greptime.rs index 3b43696b5ab7..5d69a03ea23e 100644 --- a/src/pipeline/src/etl/transform/transformer/greptime.rs +++ b/src/pipeline/src/etl/transform/transformer/greptime.rs @@ -15,6 +15,7 @@ pub mod coerce; use std::collections::HashSet; +use std::sync::Arc; use ahash::HashMap; use api::helper::proto_value_type; @@ -367,20 +368,15 @@ fn json_value_to_row( Ok(Row { values: row }) } -/// Identity pipeline for Greptime -/// This pipeline will convert the input JSON array to Greptime Rows -/// 1. The pipeline will add a default timestamp column to the schema -/// 2. The pipeline not resolve NULL value -/// 3. The pipeline assumes that the json format is fixed -/// 4. The pipeline will return an error if the same column datatype is mismatched -/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema. -pub fn identity_pipeline(array: Vec) -> Result { +fn identity_pipeline_inner<'a>( + array: Vec, + tag_column_names: Option>, +) -> Result { let mut rows = Vec::with_capacity(array.len()); - - let mut schema = SchemaInfo::default(); + let mut schema_info = SchemaInfo::default(); for value in array { if let serde_json::Value::Object(map) = value { - let row = json_value_to_row(&mut schema, map)?; + let row = json_value_to_row(&mut schema_info, map)?; rows.push(row); } } @@ -395,7 +391,7 @@ pub fn identity_pipeline(array: Vec) -> Result { let ts = GreptimeValue { value_data: Some(ValueData::TimestampNanosecondValue(ns)), }; - let column_count = schema.schema.len(); + let column_count = schema_info.schema.len(); for row in rows.iter_mut() { let diff = column_count - row.values.len(); for _ in 0..diff { @@ -403,15 +399,49 @@ pub fn identity_pipeline(array: Vec) -> Result { } row.values.push(ts.clone()); } - schema.schema.push(greptime_timestamp_schema); + schema_info.schema.push(greptime_timestamp_schema); + + // set the semantic type of the row key column to Tag + if let Some(tag_column_names) = tag_column_names { + tag_column_names.for_each(|tag_column_name| { + if let Some(index) = schema_info.index.get(tag_column_name) { + schema_info.schema[*index].semantic_type = SemanticType::Tag as i32; + } + }); + } Ok(Rows { - schema: schema.schema, + schema: schema_info.schema, rows, }) } +/// Identity pipeline for Greptime +/// This pipeline will convert the input JSON array to Greptime Rows +/// params table is used to set the semantic type of the row key column to Tag +/// 1. The pipeline will add a default timestamp column to the schema +/// 2. The pipeline not resolve NULL value +/// 3. The pipeline assumes that the json format is fixed +/// 4. The pipeline will return an error if the same column datatype is mismatched +/// 5. The pipeline will analyze the schema of each json record and merge them to get the final schema. +pub fn identity_pipeline( + array: Vec, + table: Option>, +) -> Result { + match table { + Some(table) => { + let table_info = table.table_info(); + let tag_column_names = table_info.meta.row_key_column_names(); + identity_pipeline_inner(array, Some(tag_column_names)) + } + None => identity_pipeline_inner(array, None::>), + } +} + #[cfg(test)] mod tests { + use api::v1::SemanticType; + + use crate::etl::transform::transformer::greptime::identity_pipeline_inner; use crate::identity_pipeline; #[test] @@ -437,7 +467,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_err()); assert_eq!( rows.err().unwrap().to_string(), @@ -465,7 +495,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_err()); assert_eq!( rows.err().unwrap().to_string(), @@ -493,7 +523,7 @@ mod tests { "gaga": "gaga" }), ]; - let rows = identity_pipeline(array); + let rows = identity_pipeline(array, None); assert!(rows.is_ok()); let rows = rows.unwrap(); assert_eq!(rows.schema.len(), 8); @@ -501,5 +531,58 @@ mod tests { assert_eq!(8, rows.rows[0].values.len()); assert_eq!(8, rows.rows[1].values.len()); } + { + let array = vec![ + serde_json::json!({ + "woshinull": null, + "name": "Alice", + "age": 20, + "is_student": true, + "score": 99.5, + "hobbies": "reading", + "address": "Beijing", + }), + serde_json::json!({ + "name": "Bob", + "age": 21, + "is_student": false, + "score": 88.5, + "hobbies": "swimming", + "address": "Shanghai", + "gaga": "gaga" + }), + ]; + let tag_column_names = ["name".to_string(), "address".to_string()]; + let rows = identity_pipeline_inner(array, Some(tag_column_names.iter())); + assert!(rows.is_ok()); + let rows = rows.unwrap(); + assert_eq!(rows.schema.len(), 8); + assert_eq!(rows.rows.len(), 2); + assert_eq!(8, rows.rows[0].values.len()); + assert_eq!(8, rows.rows[1].values.len()); + assert_eq!( + rows.schema + .iter() + .find(|x| x.column_name == "name") + .unwrap() + .semantic_type, + SemanticType::Tag as i32 + ); + assert_eq!( + rows.schema + .iter() + .find(|x| x.column_name == "address") + .unwrap() + .semantic_type, + SemanticType::Tag as i32 + ); + assert_eq!( + rows.schema + .iter() + .filter(|x| x.semantic_type == SemanticType::Tag as i32) + .count(), + 2 + ); + } } } diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs index 69498c209ab4..5069db51975d 100644 --- a/src/servers/src/http/event.rs +++ b/src/servers/src/http/event.rs @@ -46,8 +46,8 @@ use session::context::{Channel, QueryContext, QueryContextRef}; use snafu::{ensure, OptionExt, ResultExt}; use crate::error::{ - DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, ParseJsonSnafu, - PipelineSnafu, Result, UnsupportedContentTypeSnafu, + CatalogSnafu, DecodeOtlpRequestSnafu, Error, InvalidParameterSnafu, ParseJson5Snafu, + ParseJsonSnafu, PipelineSnafu, Result, UnsupportedContentTypeSnafu, }; use crate::http::extractor::LogTableName; use crate::http::header::CONTENT_TYPE_PROTOBUF_STR; @@ -612,10 +612,15 @@ async fn ingest_logs_inner( let mut results = Vec::with_capacity(pipeline_data.len()); let transformed_data: Rows; if pipeline_name == GREPTIME_INTERNAL_IDENTITY_PIPELINE_NAME { - let rows = pipeline::identity_pipeline(pipeline_data) + let table = state + .get_table(&table_name, &query_ctx) + .await + .context(CatalogSnafu)?; + let rows = pipeline::identity_pipeline(pipeline_data, table) .context(PipelineTransformSnafu) .context(PipelineSnafu)?; - transformed_data = rows; + + transformed_data = rows } else { let pipeline = state .get_pipeline(&pipeline_name, version, query_ctx.clone()) diff --git a/src/servers/src/query_handler.rs b/src/servers/src/query_handler.rs index 58812e9350bc..96a01593a8f1 100644 --- a/src/servers/src/query_handler.rs +++ b/src/servers/src/query_handler.rs @@ -39,7 +39,7 @@ use opentelemetry_proto::tonic::collector::metrics::v1::ExportMetricsServiceRequ use opentelemetry_proto::tonic::collector::trace::v1::ExportTraceServiceRequest; use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion, PipelineWay}; use serde_json::Value; -use session::context::QueryContextRef; +use session::context::{QueryContext, QueryContextRef}; use crate::error::Result; use crate::influxdb::InfluxdbRequest; @@ -164,4 +164,10 @@ pub trait PipelineHandler { version: PipelineVersion, query_ctx: QueryContextRef, ) -> Result>; + + async fn get_table( + &self, + table: &str, + query_ctx: &QueryContext, + ) -> std::result::Result>, catalog::error::Error>; } From fee75a1fadfda2f98a496090158e99e4b93915f4 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Thu, 12 Dec 2024 19:27:22 +0800 Subject: [PATCH 18/59] feat: collect reader metrics from prune reader (#5152) --- src/mito2/src/read/last_row.rs | 14 +++++++++++++- src/mito2/src/read/prune.rs | 16 +++++++++++++--- src/mito2/src/read/scan_util.rs | 5 +++-- src/mito2/src/sst/parquet/reader.rs | 4 ++-- 4 files changed, 31 insertions(+), 8 deletions(-) diff --git a/src/mito2/src/read/last_row.rs b/src/mito2/src/read/last_row.rs index 79d035e03271..1e2a6a5844c6 100644 --- a/src/mito2/src/read/last_row.rs +++ b/src/mito2/src/read/last_row.rs @@ -27,7 +27,7 @@ use crate::cache::{ use crate::error::Result; use crate::read::{Batch, BatchReader, BoxedBatchReader}; use crate::sst::file::FileId; -use crate::sst::parquet::reader::RowGroupReader; +use crate::sst::parquet::reader::{ReaderMetrics, RowGroupReader}; /// Reader to keep the last row for each time series. /// It assumes that batches from the input reader are @@ -115,6 +115,14 @@ impl RowGroupLastRowCachedReader { } } + /// Gets the underlying reader metrics if uncached. + pub(crate) fn metrics(&self) -> Option<&ReaderMetrics> { + match self { + RowGroupLastRowCachedReader::Hit(_) => None, + RowGroupLastRowCachedReader::Miss(reader) => Some(reader.metrics()), + } + } + /// Creates new Hit variant and updates metrics. fn new_hit(value: Arc) -> Self { selector_result_cache_hit(); @@ -234,6 +242,10 @@ impl RowGroupLastRowReader { }); cache.put_selector_result(self.key, value); } + + fn metrics(&self) -> &ReaderMetrics { + self.reader.metrics() + } } /// Push last row into `yielded_batches`. diff --git a/src/mito2/src/read/prune.rs b/src/mito2/src/read/prune.rs index cb0066e73472..500cd1430242 100644 --- a/src/mito2/src/read/prune.rs +++ b/src/mito2/src/read/prune.rs @@ -72,11 +72,21 @@ impl PruneReader { self.source = source; } - pub(crate) fn metrics(&mut self) -> &ReaderMetrics { + /// Merge metrics with the inner reader and return the merged metrics. + pub(crate) fn metrics(&self) -> ReaderMetrics { + let mut metrics = self.metrics.clone(); match &self.source { - Source::RowGroup(r) => r.metrics(), - Source::LastRow(_) => &self.metrics, + Source::RowGroup(r) => { + metrics.merge_from(r.metrics()); + } + Source::LastRow(r) => { + if let Some(inner_metrics) = r.metrics() { + metrics.merge_from(inner_metrics); + } + } } + + metrics } pub(crate) async fn next_batch(&mut self) -> Result> { diff --git a/src/mito2/src/read/scan_util.rs b/src/mito2/src/read/scan_util.rs index df790d191a4e..0bdf62e77e03 100644 --- a/src/mito2/src/read/scan_util.rs +++ b/src/mito2/src/read/scan_util.rs @@ -181,8 +181,9 @@ pub(crate) fn scan_file_ranges( } yield batch; } - if let Source::PruneReader(mut reader) = source { - reader_metrics.merge_from(reader.metrics()); + if let Source::PruneReader(reader) = source { + let prune_metrics = reader.metrics(); + reader_metrics.merge_from(&prune_metrics); } } diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index 02c5c2cf3cba..335b09426eca 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -918,10 +918,10 @@ enum ReaderState { impl ReaderState { /// Returns the metrics of the reader. - fn metrics(&mut self) -> &ReaderMetrics { + fn metrics(&self) -> ReaderMetrics { match self { ReaderState::Readable(reader) => reader.metrics(), - ReaderState::Exhausted(m) => m, + ReaderState::Exhausted(m) => m.clone(), } } } From e8e95267389148fefb8422a61e33bd593a0359c3 Mon Sep 17 00:00:00 2001 From: localhost Date: Thu, 12 Dec 2024 19:47:21 +0800 Subject: [PATCH 19/59] chore: pipeline dryrun api can currently receives pipeline raw content (#5142) * chore: pipeline dryrun api can currently receives pipeline raw content * chore: remove dryrun v1 and add test * chore: change dryrun pipeline api body schema * chore: remove useless struct PipelineInfo * chore: update PipelineDryrunParams doc * chore: increase code readability * chore: add some comment for pipeline dryrun test * Apply suggestions from code review Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> * chore: format code --------- Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> --- src/frontend/src/instance/log_handler.rs | 5 + src/pipeline/benches/processor.rs | 2 +- src/pipeline/src/etl.rs | 18 +- src/pipeline/src/manager/pipeline_operator.rs | 5 + src/pipeline/src/manager/table.rs | 2 +- src/pipeline/tests/common.rs | 2 +- src/pipeline/tests/dissect.rs | 2 +- src/pipeline/tests/pipeline.rs | 10 +- src/servers/src/http/event.rs | 142 +++++++--- src/servers/src/query_handler.rs | 3 + tests-integration/tests/http.rs | 253 ++++++++++++------ 11 files changed, 304 insertions(+), 140 deletions(-) diff --git a/src/frontend/src/instance/log_handler.rs b/src/frontend/src/instance/log_handler.rs index 9ae782c7d4ab..2da2d6717d3b 100644 --- a/src/frontend/src/instance/log_handler.rs +++ b/src/frontend/src/instance/log_handler.rs @@ -19,6 +19,7 @@ use async_trait::async_trait; use auth::{PermissionChecker, PermissionCheckerRef, PermissionReq}; use client::Output; use common_error::ext::BoxedError; +use pipeline::pipeline_operator::PipelineOperator; use pipeline::{GreptimeTransformer, Pipeline, PipelineInfo, PipelineVersion}; use servers::error::{ AuthSnafu, Error as ServerError, ExecuteGrpcRequestSnafu, PipelineSnafu, Result as ServerResult, @@ -97,6 +98,10 @@ impl PipelineHandler for Instance { .table(catalog, &schema, table, None) .await } + + fn build_pipeline(&self, pipeline: &str) -> ServerResult> { + PipelineOperator::build_pipeline(pipeline).context(PipelineSnafu) + } } impl Instance { diff --git a/src/pipeline/benches/processor.rs b/src/pipeline/benches/processor.rs index 09462753d892..8cf221af5b10 100644 --- a/src/pipeline/benches/processor.rs +++ b/src/pipeline/benches/processor.rs @@ -223,7 +223,7 @@ transform: type: uint32 "#; - parse(&Content::Yaml(pipeline_yaml.into())).unwrap() + parse(&Content::Yaml(pipeline_yaml)).unwrap() } fn criterion_benchmark(c: &mut Criterion) { diff --git a/src/pipeline/src/etl.rs b/src/pipeline/src/etl.rs index 9bd47a899ec6..45feb4b02ff6 100644 --- a/src/pipeline/src/etl.rs +++ b/src/pipeline/src/etl.rs @@ -37,9 +37,9 @@ const PROCESSORS: &str = "processors"; const TRANSFORM: &str = "transform"; const TRANSFORMS: &str = "transforms"; -pub enum Content { - Json(String), - Yaml(String), +pub enum Content<'a> { + Json(&'a str), + Yaml(&'a str), } pub fn parse(input: &Content) -> Result> @@ -379,8 +379,7 @@ transform: - field: field2 type: uint32 "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut payload).unwrap(); assert_eq!(&["my_field"].to_vec(), pipeline.required_keys()); @@ -432,8 +431,7 @@ transform: - field: ts type: timestamp, ns index: time"#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_str.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_str)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline .prepare(serde_json::Value::String(message), &mut payload) @@ -509,8 +507,7 @@ transform: type: uint32 "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let mut payload = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut payload).unwrap(); assert_eq!(&["my_field"].to_vec(), pipeline.required_keys()); @@ -554,8 +551,7 @@ transform: index: time "#; - let pipeline: Pipeline = - parse(&Content::Yaml(pipeline_yaml.into())).unwrap(); + let pipeline: Pipeline = parse(&Content::Yaml(pipeline_yaml)).unwrap(); let schema = pipeline.schemas().clone(); let mut result = pipeline.init_intermediate_state(); pipeline.prepare(input_value, &mut result).unwrap(); diff --git a/src/pipeline/src/manager/pipeline_operator.rs b/src/pipeline/src/manager/pipeline_operator.rs index 2e838144a483..4f43b89e2e74 100644 --- a/src/pipeline/src/manager/pipeline_operator.rs +++ b/src/pipeline/src/manager/pipeline_operator.rs @@ -243,4 +243,9 @@ impl PipelineOperator { }) .await } + + /// Compile a pipeline. + pub fn build_pipeline(pipeline: &str) -> Result> { + PipelineTable::compile_pipeline(pipeline) + } } diff --git a/src/pipeline/src/manager/table.rs b/src/pipeline/src/manager/table.rs index 7b3719b66707..c2a36c63ec6d 100644 --- a/src/pipeline/src/manager/table.rs +++ b/src/pipeline/src/manager/table.rs @@ -203,7 +203,7 @@ impl PipelineTable { /// Compile a pipeline from a string. pub fn compile_pipeline(pipeline: &str) -> Result> { - let yaml_content = Content::Yaml(pipeline.into()); + let yaml_content = Content::Yaml(pipeline); parse::(&yaml_content).context(CompilePipelineSnafu) } diff --git a/src/pipeline/tests/common.rs b/src/pipeline/tests/common.rs index aa96d14d5591..d825c91e4cb3 100644 --- a/src/pipeline/tests/common.rs +++ b/src/pipeline/tests/common.rs @@ -19,7 +19,7 @@ use pipeline::{parse, Content, GreptimeTransformer, Pipeline}; pub fn parse_and_exec(input_str: &str, pipeline_yaml: &str) -> Rows { let input_value = serde_json::from_str::(input_str).unwrap(); - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline"); let mut result = pipeline.init_intermediate_state(); diff --git a/src/pipeline/tests/dissect.rs b/src/pipeline/tests/dissect.rs index 7577d58080c7..56386d0e860a 100644 --- a/src/pipeline/tests/dissect.rs +++ b/src/pipeline/tests/dissect.rs @@ -270,7 +270,7 @@ transform: let input_value = serde_json::from_str::(input_str).unwrap(); - let yaml_content = pipeline::Content::Yaml(pipeline_yaml.into()); + let yaml_content = pipeline::Content::Yaml(pipeline_yaml); let pipeline: pipeline::Pipeline = pipeline::parse(&yaml_content).expect("failed to parse pipeline"); let mut result = pipeline.init_intermediate_state(); diff --git a/src/pipeline/tests/pipeline.rs b/src/pipeline/tests/pipeline.rs index e68c7b9e6a6e..de724e1a27d2 100644 --- a/src/pipeline/tests/pipeline.rs +++ b/src/pipeline/tests/pipeline.rs @@ -417,7 +417,7 @@ transform: .map(|(_, d)| GreptimeValue { value_data: d }) .collect::>(); - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).expect("failed to parse pipeline"); let mut stats = pipeline.init_intermediate_state(); @@ -487,7 +487,7 @@ transform: type: json "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -592,7 +592,7 @@ transform: type: json "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -655,7 +655,7 @@ transform: index: timestamp "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); @@ -691,7 +691,7 @@ transform: - message type: string "#; - let yaml_content = Content::Yaml(pipeline_yaml.into()); + let yaml_content = Content::Yaml(pipeline_yaml); let pipeline: Pipeline = parse(&yaml_content).unwrap(); let mut status = pipeline.init_intermediate_state(); diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs index 5069db51975d..b6b520627d66 100644 --- a/src/servers/src/http/event.rs +++ b/src/servers/src/http/event.rs @@ -38,7 +38,7 @@ use lazy_static::lazy_static; use loki_api::prost_types::Timestamp; use pipeline::error::PipelineTransformSnafu; use pipeline::util::to_pipeline_version; -use pipeline::PipelineVersion; +use pipeline::{GreptimeTransformer, PipelineVersion}; use prost::Message; use serde::{Deserialize, Serialize}; use serde_json::{Deserializer, Map, Value}; @@ -276,39 +276,11 @@ fn transform_ndjson_array_factory( }) } -#[axum_macros::debug_handler] -pub async fn pipeline_dryrun( - State(log_state): State, - Query(query_params): Query, - Extension(mut query_ctx): Extension, - TypedHeader(content_type): TypedHeader, - payload: String, +/// Dryrun pipeline with given data +fn dryrun_pipeline_inner( + value: Vec, + pipeline: &pipeline::Pipeline, ) -> Result { - let handler = log_state.log_handler; - let pipeline_name = query_params.pipeline_name.context(InvalidParameterSnafu { - reason: "pipeline_name is required", - })?; - - let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?; - - let ignore_errors = query_params.ignore_errors.unwrap_or(false); - - let value = extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?; - - ensure!( - value.len() <= 10, - InvalidParameterSnafu { - reason: "too many rows for dryrun", - } - ); - - query_ctx.set_channel(Channel::Http); - let query_ctx = Arc::new(query_ctx); - - let pipeline = handler - .get_pipeline(&pipeline_name, version, query_ctx.clone()) - .await?; - let mut intermediate_state = pipeline.init_intermediate_state(); let mut results = Vec::with_capacity(value.len()); @@ -387,6 +359,110 @@ pub async fn pipeline_dryrun( Ok(Json(result).into_response()) } +/// Dryrun pipeline with given data +/// pipeline_name and pipeline_version to specify pipeline stored in db +/// pipeline to specify pipeline raw content +/// data to specify data +/// data maght be list of string or list of object +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct PipelineDryrunParams { + pub pipeline_name: Option, + pub pipeline_version: Option, + pub pipeline: Option, + pub data: Vec, +} + +/// Check if the payload is valid json +/// Check if the payload contains pipeline or pipeline_name and data +/// Return Some if valid, None if invalid +fn check_pipeline_dryrun_params_valid(payload: &str) -> Option { + match serde_json::from_str::(payload) { + // payload with pipeline or pipeline_name and data is array + Ok(params) if params.pipeline.is_some() || params.pipeline_name.is_some() => Some(params), + // because of the pipeline_name or pipeline is required + Ok(_) => None, + // invalid json + Err(_) => None, + } +} + +/// Check if the pipeline_name exists +fn check_pipeline_name_exists(pipeline_name: Option) -> Result { + pipeline_name.context(InvalidParameterSnafu { + reason: "pipeline_name is required", + }) +} + +/// Check if the data length less than 10 +fn check_data_valid(data_len: usize) -> Result<()> { + ensure!( + data_len <= 10, + InvalidParameterSnafu { + reason: "data is required", + } + ); + Ok(()) +} + +#[axum_macros::debug_handler] +pub async fn pipeline_dryrun( + State(log_state): State, + Query(query_params): Query, + Extension(mut query_ctx): Extension, + TypedHeader(content_type): TypedHeader, + payload: String, +) -> Result { + let handler = log_state.log_handler; + + match check_pipeline_dryrun_params_valid(&payload) { + Some(params) => { + let data = params.data; + + check_data_valid(data.len())?; + + match params.pipeline { + None => { + let version = + to_pipeline_version(params.pipeline_version).context(PipelineSnafu)?; + let pipeline_name = check_pipeline_name_exists(params.pipeline_name)?; + let pipeline = handler + .get_pipeline(&pipeline_name, version, Arc::new(query_ctx)) + .await?; + dryrun_pipeline_inner(data, &pipeline) + } + Some(pipeline) => { + let pipeline = handler.build_pipeline(&pipeline)?; + dryrun_pipeline_inner(data, &pipeline) + } + } + } + None => { + // This path is for back compatibility with the previous dry run code + // where the payload is just data (JSON or plain text) and the pipeline name + // is specified using query param. + let pipeline_name = check_pipeline_name_exists(query_params.pipeline_name)?; + + let version = to_pipeline_version(query_params.version).context(PipelineSnafu)?; + + let ignore_errors = query_params.ignore_errors.unwrap_or(false); + + let value = + extract_pipeline_value_by_content_type(content_type, payload, ignore_errors)?; + + check_data_valid(value.len())?; + + query_ctx.set_channel(Channel::Http); + let query_ctx = Arc::new(query_ctx); + + let pipeline = handler + .get_pipeline(&pipeline_name, version, query_ctx.clone()) + .await?; + + dryrun_pipeline_inner(value, &pipeline) + } + } +} + #[axum_macros::debug_handler] pub async fn loki_ingest( State(log_state): State, diff --git a/src/servers/src/query_handler.rs b/src/servers/src/query_handler.rs index 96a01593a8f1..ff92d3c5d15b 100644 --- a/src/servers/src/query_handler.rs +++ b/src/servers/src/query_handler.rs @@ -170,4 +170,7 @@ pub trait PipelineHandler { table: &str, query_ctx: &QueryContext, ) -> std::result::Result>, catalog::error::Error>; + + //// Build a pipeline from a string. + fn build_pipeline(&self, pipeline: &str) -> Result>; } diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 5a48fef39e43..ab2ec4ea6777 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -1319,7 +1319,7 @@ pub async fn test_test_pipeline_api(store_type: StorageType) { // handshake let client = TestClient::new(app); - let body = r#" + let pipeline_content = r#" processors: - date: field: time @@ -1346,7 +1346,7 @@ transform: let res = client .post("/v1/events/pipelines/test") .header("Content-Type", "application/x-yaml") - .body(body) + .body(pipeline_content) .send() .await; @@ -1367,113 +1367,192 @@ transform: let pipeline = pipelines.first().unwrap(); assert_eq!(pipeline.get("name").unwrap(), "test"); - // 2. write data - let data_body = r#" + let dryrun_schema = json!([ + { + "colume_type": "FIELD", + "data_type": "INT32", + "fulltext": false, + "name": "id1" + }, + { + "colume_type": "FIELD", + "data_type": "INT32", + "fulltext": false, + "name": "id2" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "type" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "log" + }, + { + "colume_type": "FIELD", + "data_type": "STRING", + "fulltext": false, + "name": "logger" + }, + { + "colume_type": "TIMESTAMP", + "data_type": "TIMESTAMP_NANOSECOND", + "fulltext": false, + "name": "time" + } + ]); + let dryrun_rows = json!([ [ - { - "id1": "2436", - "id2": "2528", - "logger": "INTERACT.MANAGER", - "type": "I", - "time": "2024-05-25 20:16:37.217", - "log": "ClusterAdapter:enter sendTextDataToCluster\\n" - } - ] - "#; - let res = client - .post("/v1/events/pipelines/dryrun?pipeline_name=test") - .header("Content-Type", "application/json") - .body(data_body) - .send() - .await; - assert_eq!(res.status(), StatusCode::OK); - let body: Value = res.json().await; - let schema = &body["schema"]; - let rows = &body["rows"]; - assert_eq!( - schema, - &json!([ { - "colume_type": "FIELD", "data_type": "INT32", - "fulltext": false, - "name": "id1" + "key": "id1", + "semantic_type": "FIELD", + "value": 2436 }, { - "colume_type": "FIELD", "data_type": "INT32", - "fulltext": false, - "name": "id2" + "key": "id2", + "semantic_type": "FIELD", + "value": 2528 }, { - "colume_type": "FIELD", "data_type": "STRING", - "fulltext": false, - "name": "type" + "key": "type", + "semantic_type": "FIELD", + "value": "I" }, { - "colume_type": "FIELD", "data_type": "STRING", - "fulltext": false, - "name": "log" + "key": "log", + "semantic_type": "FIELD", + "value": "ClusterAdapter:enter sendTextDataToCluster\\n" }, { - "colume_type": "FIELD", "data_type": "STRING", - "fulltext": false, - "name": "logger" + "key": "logger", + "semantic_type": "FIELD", + "value": "INTERACT.MANAGER" }, { - "colume_type": "TIMESTAMP", "data_type": "TIMESTAMP_NANOSECOND", - "fulltext": false, - "name": "time" + "key": "time", + "semantic_type": "TIMESTAMP", + "value": "2024-05-25 20:16:37.217+0000" } - ]) - ); - assert_eq!( - rows, - &json!([ - [ - { - "data_type": "INT32", - "key": "id1", - "semantic_type": "FIELD", - "value": 2436 - }, - { - "data_type": "INT32", - "key": "id2", - "semantic_type": "FIELD", - "value": 2528 - }, - { - "data_type": "STRING", - "key": "type", - "semantic_type": "FIELD", - "value": "I" - }, - { - "data_type": "STRING", - "key": "log", - "semantic_type": "FIELD", - "value": "ClusterAdapter:enter sendTextDataToCluster\\n" - }, - { - "data_type": "STRING", - "key": "logger", - "semantic_type": "FIELD", - "value": "INTERACT.MANAGER" - }, + ] + ]); + { + // test original api + let data_body = r#" + [ + { + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" + } + ] + "#; + let res = client + .post("/v1/events/pipelines/dryrun?pipeline_name=test") + .header("Content-Type", "application/json") + .body(data_body) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // test new api specify pipeline via pipeline_name + let body = r#" + { + "pipeline_name": "test", + "data": [ { - "data_type": "TIMESTAMP_NANOSECOND", - "key": "time", - "semantic_type": "TIMESTAMP", - "value": "2024-05-25 20:16:37.217+0000" + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" } ] - ]) - ); + } + "#; + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // test new api specify pipeline via pipeline raw data + let mut body = json!({ + "data": [ + { + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" + } + ] + }); + body["pipeline"] = json!(pipeline_content); + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await; + assert_eq!(res.status(), StatusCode::OK); + let body: Value = res.json().await; + let schema = &body["schema"]; + let rows = &body["rows"]; + assert_eq!(schema, &dryrun_schema); + assert_eq!(rows, &dryrun_rows); + } + { + // failback to old version api + // not pipeline and pipeline_name in the body + let body = json!({ + "data": [ + { + "id1": "2436", + "id2": "2528", + "logger": "INTERACT.MANAGER", + "type": "I", + "time": "2024-05-25 20:16:37.217", + "log": "ClusterAdapter:enter sendTextDataToCluster\\n" + } + ] + }); + let res = client + .post("/v1/events/pipelines/dryrun") + .header("Content-Type", "application/json") + .body(body.to_string()) + .send() + .await; + assert_eq!(res.status(), StatusCode::BAD_REQUEST); + } guard.remove_all().await; } From 4b4c6dbb66497e48e9573509dd1d9ab76e57097e Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Fri, 13 Dec 2024 15:34:24 +0800 Subject: [PATCH 20/59] refactor: cache inverted index with fixed-size page (#5114) * feat: cache inverted index by page instead of file * fix: add unit test and fix bugs * chore: typo * chore: ci * fix: math * chore: apply review comments * chore: renames * test: add unit test for index key calculation * refactor: use ReadableSize * feat: add config for inverted index page size * chore: update config file * refactor: handle multiple range read and fix some related bugs * fix: add config * test: turn to a fs reader to match behaviors of object store --- Cargo.lock | 1 + config/config.md | 4 + config/datanode.example.toml | 9 + config/standalone.example.toml | 3 + src/common/base/src/range_read.rs | 4 +- src/index/src/inverted_index/format/reader.rs | 13 +- .../src/inverted_index/format/reader/blob.rs | 17 +- src/mito2/Cargo.toml | 1 + src/mito2/src/cache.rs | 14 +- src/mito2/src/cache/index.rs | 399 ++++++++++++++++-- src/mito2/src/config.rs | 3 + src/mito2/src/error.rs | 11 +- src/mito2/src/sst/index.rs | 2 +- .../src/sst/index/inverted_index/applier.rs | 13 +- .../src/sst/index/inverted_index/creator.rs | 2 +- src/mito2/src/test_util.rs | 5 +- src/mito2/src/worker.rs | 1 + tests-integration/tests/http.rs | 1 + 18 files changed, 434 insertions(+), 69 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 534b8c465ae6..b60615c8e54c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6643,6 +6643,7 @@ dependencies = [ "async-channel 1.9.0", "async-stream", "async-trait", + "bytemuck", "bytes", "common-base", "common-config", diff --git a/config/config.md b/config/config.md index 6a500a5b4a34..d3353930b163 100644 --- a/config/config.md +++ b/config/config.md @@ -150,6 +150,7 @@ | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. | | `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. | | `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. | +| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. | | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. | | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically (default)
- `disable`: never | @@ -475,6 +476,9 @@ | `region_engine.mito.inverted_index.apply_on_query` | String | `auto` | Whether to apply the index on query
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.inverted_index.mem_threshold_on_create` | String | `auto` | Memory threshold for performing an external sort during index creation.
- `auto`: automatically determine the threshold based on the system memory size (default)
- `unlimited`: no memory limit
- `[size]` e.g. `64MB`: fixed memory threshold | | `region_engine.mito.inverted_index.intermediate_path` | String | `""` | Deprecated, use `region_engine.mito.index.aux_path` instead. | +| `region_engine.mito.inverted_index.metadata_cache_size` | String | `64MiB` | Cache size for inverted index metadata. | +| `region_engine.mito.inverted_index.content_cache_size` | String | `128MiB` | Cache size for inverted index content. | +| `region_engine.mito.inverted_index.content_cache_page_size` | String | `8MiB` | Page size for inverted index content cache. | | `region_engine.mito.fulltext_index` | -- | -- | The options for full-text index in Mito engine. | | `region_engine.mito.fulltext_index.create_on_flush` | String | `auto` | Whether to create the index on flush.
- `auto`: automatically (default)
- `disable`: never | | `region_engine.mito.fulltext_index.create_on_compaction` | String | `auto` | Whether to create the index on compaction.
- `auto`: automatically (default)
- `disable`: never | diff --git a/config/datanode.example.toml b/config/datanode.example.toml index 0ba80a9f7d92..90a4d69b2e89 100644 --- a/config/datanode.example.toml +++ b/config/datanode.example.toml @@ -543,6 +543,15 @@ mem_threshold_on_create = "auto" ## Deprecated, use `region_engine.mito.index.aux_path` instead. intermediate_path = "" +## Cache size for inverted index metadata. +metadata_cache_size = "64MiB" + +## Cache size for inverted index content. +content_cache_size = "128MiB" + +## Page size for inverted index content cache. +content_cache_page_size = "8MiB" + ## The options for full-text index in Mito engine. [region_engine.mito.fulltext_index] diff --git a/config/standalone.example.toml b/config/standalone.example.toml index 8eae532d6166..b73246d37f0a 100644 --- a/config/standalone.example.toml +++ b/config/standalone.example.toml @@ -588,6 +588,9 @@ metadata_cache_size = "64MiB" ## Cache size for inverted index content. content_cache_size = "128MiB" +## Page size for inverted index content cache. +content_cache_page_size = "8MiB" + ## The options for full-text index in Mito engine. [region_engine.mito.fulltext_index] diff --git a/src/common/base/src/range_read.rs b/src/common/base/src/range_read.rs index 61f28cb629fd..53c26eeebdee 100644 --- a/src/common/base/src/range_read.rs +++ b/src/common/base/src/range_read.rs @@ -205,9 +205,7 @@ impl RangeReader for Vec { }) } - async fn read(&mut self, mut range: Range) -> io::Result { - range.end = range.end.min(self.len() as u64); - + async fn read(&mut self, range: Range) -> io::Result { let bytes = Bytes::copy_from_slice(&self[range.start as usize..range.end as usize]); Ok(bytes) } diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index a6fb0cecbfcd..904681d5f40a 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; @@ -30,23 +31,23 @@ mod footer; #[mockall::automock] #[async_trait] pub trait InvertedIndexReader: Send { - /// Reads all data to dest. - async fn read_all(&mut self, dest: &mut Vec) -> Result; - /// Seeks to given offset and reads data with exact size as provided. - async fn seek_read(&mut self, offset: u64, size: u32) -> Result>; + async fn range_read(&mut self, offset: u64, size: u32) -> Result>; + + /// Reads the bytes in the given ranges. + async fn read_vec(&mut self, ranges: &[Range]) -> Result>>; /// Retrieves metadata of all inverted indices stored within the blob. async fn metadata(&mut self) -> Result>; /// Retrieves the finite state transducer (FST) map from the given offset and size. async fn fst(&mut self, offset: u64, size: u32) -> Result { - let fst_data = self.seek_read(offset, size).await?; + let fst_data = self.range_read(offset, size).await?; FstMap::new(fst_data).context(DecodeFstSnafu) } /// Retrieves the bitmap from the given offset and size. async fn bitmap(&mut self, offset: u64, size: u32) -> Result { - self.seek_read(offset, size).await.map(BitVec::from_vec) + self.range_read(offset, size).await.map(BitVec::from_vec) } } diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index de34cd36f849..371655d535f3 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; @@ -50,16 +51,7 @@ impl InvertedIndexBlobReader { #[async_trait] impl InvertedIndexReader for InvertedIndexBlobReader { - async fn read_all(&mut self, dest: &mut Vec) -> Result { - let metadata = self.source.metadata().await.context(CommonIoSnafu)?; - self.source - .read_into(0..metadata.content_length, dest) - .await - .context(CommonIoSnafu)?; - Ok(metadata.content_length as usize) - } - - async fn seek_read(&mut self, offset: u64, size: u32) -> Result> { + async fn range_read(&mut self, offset: u64, size: u32) -> Result> { let buf = self .source .read(offset..offset + size as u64) @@ -68,6 +60,11 @@ impl InvertedIndexReader for InvertedIndexBlobReader { Ok(buf.into()) } + async fn read_vec(&mut self, ranges: &[Range]) -> Result>> { + let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?; + Ok(bufs.into_iter().map(|buf| buf.into()).collect()) + } + async fn metadata(&mut self) -> Result> { let metadata = self.source.metadata().await.context(CommonIoSnafu)?; let blob_size = metadata.content_length; diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index eedf6ae636d8..eecb79440a2e 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -17,6 +17,7 @@ aquamarine.workspace = true async-channel = "1.9" async-stream.workspace = true async-trait = "0.1" +bytemuck.workspace = true bytes.workspace = true common-base.workspace = true common-config.workspace = true diff --git a/src/mito2/src/cache.rs b/src/mito2/src/cache.rs index 7018b039d62e..03cf9136245a 100644 --- a/src/mito2/src/cache.rs +++ b/src/mito2/src/cache.rs @@ -244,6 +244,7 @@ pub struct CacheManagerBuilder { page_cache_size: u64, index_metadata_size: u64, index_content_size: u64, + index_content_page_size: u64, puffin_metadata_size: u64, write_cache: Option, selector_result_cache_size: u64, @@ -286,6 +287,12 @@ impl CacheManagerBuilder { self } + /// Sets page size for index content. + pub fn index_content_page_size(mut self, bytes: u64) -> Self { + self.index_content_page_size = bytes; + self + } + /// Sets cache size for puffin metadata. pub fn puffin_metadata_size(mut self, bytes: u64) -> Self { self.puffin_metadata_size = bytes; @@ -352,8 +359,11 @@ impl CacheManagerBuilder { }) .build() }); - let inverted_index_cache = - InvertedIndexCache::new(self.index_metadata_size, self.index_content_size); + let inverted_index_cache = InvertedIndexCache::new( + self.index_metadata_size, + self.index_content_size, + self.index_content_page_size, + ); let puffin_metadata_cache = PuffinMetadataCache::new(self.puffin_metadata_size, &CACHE_BYTES); let selector_result_cache = (self.selector_result_cache_size != 0).then(|| { diff --git a/src/mito2/src/cache/index.rs b/src/mito2/src/cache/index.rs index 4e6e4deee260..e25fb22dcbf5 100644 --- a/src/mito2/src/cache/index.rs +++ b/src/mito2/src/cache/index.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::ops::Range; use std::sync::Arc; use api::v1::index::InvertedIndexMetas; @@ -34,14 +35,16 @@ const INDEX_CONTENT_TYPE: &str = "index_content"; /// Inverted index blob reader with cache. pub struct CachedInvertedIndexBlobReader { file_id: FileId, + file_size: u64, inner: R, cache: InvertedIndexCacheRef, } impl CachedInvertedIndexBlobReader { - pub fn new(file_id: FileId, inner: R, cache: InvertedIndexCacheRef) -> Self { + pub fn new(file_id: FileId, file_size: u64, inner: R, cache: InvertedIndexCacheRef) -> Self { Self { file_id, + file_size, inner, cache, } @@ -59,43 +62,77 @@ where offset: u64, size: u32, ) -> index::inverted_index::error::Result> { - let range = offset as usize..(offset + size as u64) as usize; - if let Some(cached) = self.cache.get_index(IndexKey { - file_id: self.file_id, - }) { - CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); - Ok(cached[range].to_vec()) - } else { - let mut all_data = Vec::with_capacity(1024 * 1024); - self.inner.read_all(&mut all_data).await?; - let result = all_data[range].to_vec(); - self.cache.put_index( - IndexKey { - file_id: self.file_id, - }, - Arc::new(all_data), - ); - CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); - Ok(result) + let keys = + IndexDataPageKey::generate_page_keys(self.file_id, offset, size, self.cache.page_size); + // Size is 0, return empty data. + if keys.is_empty() { + return Ok(Vec::new()); + } + // TODO: Can be replaced by an uncontinuous structure like opendal::Buffer. + let mut data = Vec::with_capacity(keys.len()); + data.resize(keys.len(), Arc::new(Vec::new())); + let mut cache_miss_range = vec![]; + let mut cache_miss_idx = vec![]; + let last_index = keys.len() - 1; + // TODO: Avoid copy as much as possible. + for (i, index) in keys.clone().into_iter().enumerate() { + match self.cache.get_index(&index) { + Some(page) => { + CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); + data[i] = page; + } + None => { + CACHE_MISS.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); + let base_offset = index.page_id * self.cache.page_size; + let pruned_size = if i == last_index { + prune_size(&keys, self.file_size, self.cache.page_size) + } else { + self.cache.page_size + }; + cache_miss_range.push(base_offset..base_offset + pruned_size); + cache_miss_idx.push(i); + } + } } + if !cache_miss_range.is_empty() { + let pages = self.inner.read_vec(&cache_miss_range).await?; + for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) { + let page = Arc::new(page); + let key = keys[i].clone(); + data[i] = page.clone(); + self.cache.put_index(key, page.clone()); + } + } + let mut result = Vec::with_capacity(size as usize); + data.iter().enumerate().for_each(|(i, page)| { + let range = if i == 0 { + IndexDataPageKey::calculate_first_page_range(offset, size, self.cache.page_size) + } else if i == last_index { + IndexDataPageKey::calculate_last_page_range(offset, size, self.cache.page_size) + } else { + 0..self.cache.page_size as usize + }; + result.extend_from_slice(&page[range]); + }); + Ok(result) } } #[async_trait] impl InvertedIndexReader for CachedInvertedIndexBlobReader { - async fn read_all( - &mut self, - dest: &mut Vec, - ) -> index::inverted_index::error::Result { - self.inner.read_all(dest).await - } - - async fn seek_read( + async fn range_read( &mut self, offset: u64, size: u32, ) -> index::inverted_index::error::Result> { - self.inner.seek_read(offset, size).await + self.inner.range_read(offset, size).await + } + + async fn read_vec( + &mut self, + ranges: &[Range], + ) -> index::inverted_index::error::Result>> { + self.inner.read_vec(ranges).await } async fn metadata(&mut self) -> index::inverted_index::error::Result> { @@ -130,22 +167,81 @@ impl InvertedIndexReader for CachedInvertedIndexBlobRead } #[derive(Debug, Clone, PartialEq, Eq, Hash)] -pub struct IndexKey { +pub struct IndexMetadataKey { + file_id: FileId, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct IndexDataPageKey { file_id: FileId, + page_id: u64, +} + +impl IndexDataPageKey { + /// Converts an offset to a page ID based on the page size. + fn calculate_page_id(offset: u64, page_size: u64) -> u64 { + offset / page_size + } + + /// Calculates the total number of pages that a given size spans, starting from a specific offset. + fn calculate_page_count(offset: u64, size: u32, page_size: u64) -> u32 { + let start_page = Self::calculate_page_id(offset, page_size); + let end_page = Self::calculate_page_id(offset + (size as u64) - 1, page_size); + (end_page + 1 - start_page) as u32 + } + + /// Computes the byte range in the first page based on the offset and size. + /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the first page range is 1000..4096. + fn calculate_first_page_range(offset: u64, size: u32, page_size: u64) -> Range { + let start = (offset % page_size) as usize; + let end = if size > page_size as u32 - start as u32 { + page_size as usize + } else { + start + size as usize + }; + start..end + } + + /// Computes the byte range in the last page based on the offset and size. + /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the last page range is 0..1904. + fn calculate_last_page_range(offset: u64, size: u32, page_size: u64) -> Range { + let offset = offset as usize; + let size = size as usize; + let page_size = page_size as usize; + if (offset + size) % page_size == 0 { + 0..page_size + } else { + 0..((offset + size) % page_size) + } + } + + /// Generates a vector of IndexKey instances for the pages that a given offset and size span. + fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec { + let start_page = Self::calculate_page_id(offset, page_size); + let total_pages = Self::calculate_page_count(offset, size, page_size); + (0..total_pages) + .map(|i| Self { + file_id, + page_id: start_page + i as u64, + }) + .collect() + } } pub type InvertedIndexCacheRef = Arc; pub struct InvertedIndexCache { /// Cache for inverted index metadata - index_metadata: moka::sync::Cache>, + index_metadata: moka::sync::Cache>, /// Cache for inverted index content. - index: moka::sync::Cache>>, + index: moka::sync::Cache>>, + // Page size for index content. + page_size: u64, } impl InvertedIndexCache { /// Creates `InvertedIndexCache` with provided `index_metadata_cap` and `index_content_cap`. - pub fn new(index_metadata_cap: u64, index_content_cap: u64) -> Self { + pub fn new(index_metadata_cap: u64, index_content_cap: u64, page_size: u64) -> Self { common_telemetry::debug!("Building InvertedIndexCache with metadata size: {index_metadata_cap}, content size: {index_content_cap}"); let index_metadata = moka::sync::CacheBuilder::new(index_metadata_cap) .name("inverted_index_metadata") @@ -170,29 +266,29 @@ impl InvertedIndexCache { Self { index_metadata, index: index_cache, + page_size, } } } impl InvertedIndexCache { pub fn get_index_metadata(&self, file_id: FileId) -> Option> { - self.index_metadata.get(&IndexKey { file_id }) + self.index_metadata.get(&IndexMetadataKey { file_id }) } pub fn put_index_metadata(&self, file_id: FileId, metadata: Arc) { - let key = IndexKey { file_id }; + let key = IndexMetadataKey { file_id }; CACHE_BYTES .with_label_values(&[INDEX_METADATA_TYPE]) .add(index_metadata_weight(&key, &metadata).into()); self.index_metadata.insert(key, metadata) } - // todo(hl): align index file content to pages with size like 4096 bytes. - pub fn get_index(&self, key: IndexKey) -> Option>> { - self.index.get(&key) + pub fn get_index(&self, key: &IndexDataPageKey) -> Option>> { + self.index.get(key) } - pub fn put_index(&self, key: IndexKey, value: Arc>) { + pub fn put_index(&self, key: IndexDataPageKey, value: Arc>) { CACHE_BYTES .with_label_values(&[INDEX_CONTENT_TYPE]) .add(index_content_weight(&key, &value).into()); @@ -201,11 +297,234 @@ impl InvertedIndexCache { } /// Calculates weight for index metadata. -fn index_metadata_weight(k: &IndexKey, v: &Arc) -> u32 { +fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc) -> u32 { (k.file_id.as_bytes().len() + v.encoded_len()) as u32 } /// Calculates weight for index content. -fn index_content_weight(k: &IndexKey, v: &Arc>) -> u32 { +fn index_content_weight(k: &IndexDataPageKey, v: &Arc>) -> u32 { (k.file_id.as_bytes().len() + v.len()) as u32 } + +/// Prunes the size of the last page based on the indexes. +/// We have following cases: +/// 1. The rest file size is less than the page size, read to the end of the file. +/// 2. Otherwise, read the page size. +fn prune_size(indexes: &[IndexDataPageKey], file_size: u64, page_size: u64) -> u64 { + let last_page_start = indexes.last().map(|i| i.page_id * page_size).unwrap_or(0); + page_size.min(file_size - last_page_start) +} + +#[cfg(test)] +mod test { + use std::num::NonZeroUsize; + + use common_base::BitVec; + use futures::stream; + use index::inverted_index::format::reader::{InvertedIndexBlobReader, InvertedIndexReader}; + use index::inverted_index::format::writer::{InvertedIndexBlobWriter, InvertedIndexWriter}; + use index::inverted_index::Bytes; + use prometheus::register_int_counter_vec; + use rand::{Rng, RngCore}; + + use super::*; + use crate::sst::index::store::InstrumentedStore; + use crate::test_util::TestEnv; + + // Fuzz test for index data page key + #[test] + fn fuzz_index_calculation() { + // randomly generate a large u8 array + let mut rng = rand::thread_rng(); + let mut data = vec![0u8; 1024 * 1024]; + rng.fill_bytes(&mut data); + let file_id = FileId::random(); + + for _ in 0..100 { + let offset = rng.gen_range(0..data.len() as u64); + let size = rng.gen_range(0..data.len() as u32 - offset as u32); + let page_size: usize = rng.gen_range(1..1024); + + let indexes = + IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64); + let page_num = indexes.len(); + let mut read = Vec::with_capacity(size as usize); + let last_index = indexes.len() - 1; + for (i, key) in indexes.into_iter().enumerate() { + let start = key.page_id as usize * page_size; + let page = if start + page_size < data.len() { + &data[start..start + page_size] + } else { + &data[start..] + }; + let range = if i == 0 { + // first page range + IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64) + } else if i == last_index { + // last page range. when the first page is the last page, the range is not used. + IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64) + } else { + 0..page_size + }; + read.extend_from_slice(&page[range]); + } + let expected_range = offset as usize..(offset + size as u64 as u64) as usize; + if read != data.get(expected_range).unwrap() { + panic!( + "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nfirst page range: {:?}, last page range: {:?}, page num: {}", + offset, size, page_size, read.len(), size as usize, + IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64), + IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64), page_num + ); + } + } + } + + fn unpack(fst_value: u64) -> [u32; 2] { + bytemuck::cast::(fst_value) + } + + async fn create_inverted_index_blob() -> Vec { + let mut blob = Vec::new(); + let mut writer = InvertedIndexBlobWriter::new(&mut blob); + writer + .add_index( + "tag0".to_string(), + BitVec::from_slice(&[0b0000_0001, 0b0000_0000]), + Box::new(stream::iter(vec![ + Ok((Bytes::from("a"), BitVec::from_slice(&[0b0000_0001]))), + Ok((Bytes::from("b"), BitVec::from_slice(&[0b0010_0000]))), + Ok((Bytes::from("c"), BitVec::from_slice(&[0b0000_0001]))), + ])), + ) + .await + .unwrap(); + writer + .add_index( + "tag1".to_string(), + BitVec::from_slice(&[0b0000_0001, 0b0000_0000]), + Box::new(stream::iter(vec![ + Ok((Bytes::from("x"), BitVec::from_slice(&[0b0000_0001]))), + Ok((Bytes::from("y"), BitVec::from_slice(&[0b0010_0000]))), + Ok((Bytes::from("z"), BitVec::from_slice(&[0b0000_0001]))), + ])), + ) + .await + .unwrap(); + writer + .finish(8, NonZeroUsize::new(1).unwrap()) + .await + .unwrap(); + + blob + } + + #[tokio::test] + async fn test_inverted_index_cache() { + let blob = create_inverted_index_blob().await; + + // Init a test range reader in local fs. + let mut env = TestEnv::new(); + let file_size = blob.len() as u64; + let store = env.init_object_store_manager(); + let temp_path = "data"; + store.write(temp_path, blob).await.unwrap(); + let store = InstrumentedStore::new(store); + let metric = + register_int_counter_vec!("test_bytes", "a counter for test", &["test"]).unwrap(); + let counter = metric.with_label_values(&["test"]); + let range_reader = store + .range_reader("data", &counter, &counter) + .await + .unwrap(); + + let reader = InvertedIndexBlobReader::new(range_reader); + let mut cached_reader = CachedInvertedIndexBlobReader::new( + FileId::random(), + file_size, + reader, + Arc::new(InvertedIndexCache::new(8192, 8192, 50)), + ); + let metadata = cached_reader.metadata().await.unwrap(); + assert_eq!(metadata.total_row_count, 8); + assert_eq!(metadata.segment_row_count, 1); + assert_eq!(metadata.metas.len(), 2); + // tag0 + let tag0 = metadata.metas.get("tag0").unwrap(); + let stats0 = tag0.stats.as_ref().unwrap(); + assert_eq!(stats0.distinct_count, 3); + assert_eq!(stats0.null_count, 1); + assert_eq!(stats0.min_value, Bytes::from("a")); + assert_eq!(stats0.max_value, Bytes::from("c")); + let fst0 = cached_reader + .fst( + tag0.base_offset + tag0.relative_fst_offset as u64, + tag0.fst_size, + ) + .await + .unwrap(); + assert_eq!(fst0.len(), 3); + let [offset, size] = unpack(fst0.get(b"a").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + let [offset, size] = unpack(fst0.get(b"b").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); + let [offset, size] = unpack(fst0.get(b"c").unwrap()); + let bitmap = cached_reader + .bitmap(tag0.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + + // tag1 + let tag1 = metadata.metas.get("tag1").unwrap(); + let stats1 = tag1.stats.as_ref().unwrap(); + assert_eq!(stats1.distinct_count, 3); + assert_eq!(stats1.null_count, 1); + assert_eq!(stats1.min_value, Bytes::from("x")); + assert_eq!(stats1.max_value, Bytes::from("z")); + let fst1 = cached_reader + .fst( + tag1.base_offset + tag1.relative_fst_offset as u64, + tag1.fst_size, + ) + .await + .unwrap(); + assert_eq!(fst1.len(), 3); + let [offset, size] = unpack(fst1.get(b"x").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + let [offset, size] = unpack(fst1.get(b"y").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0010_0000])); + let [offset, size] = unpack(fst1.get(b"z").unwrap()); + let bitmap = cached_reader + .bitmap(tag1.base_offset + offset as u64, size) + .await + .unwrap(); + assert_eq!(bitmap, BitVec::from_slice(&[0b0000_0001])); + + // fuzz test + let mut rng = rand::thread_rng(); + for _ in 0..100 { + let offset = rng.gen_range(0..file_size); + let size = rng.gen_range(0..file_size as u32 - offset as u32); + let expected = cached_reader.range_read(offset, size).await.unwrap(); + let read = cached_reader.get_or_load(offset, size).await.unwrap(); + assert_eq!(read, expected); + } + } +} diff --git a/src/mito2/src/config.rs b/src/mito2/src/config.rs index dda3f4271059..963089c60aed 100644 --- a/src/mito2/src/config.rs +++ b/src/mito2/src/config.rs @@ -416,6 +416,8 @@ pub struct InvertedIndexConfig { pub metadata_cache_size: ReadableSize, /// Cache size for inverted index content. Setting it to 0 to disable the cache. pub content_cache_size: ReadableSize, + /// Page size for inverted index content. + pub content_cache_page_size: ReadableSize, } impl InvertedIndexConfig { @@ -441,6 +443,7 @@ impl Default for InvertedIndexConfig { intermediate_path: String::new(), metadata_cache_size: ReadableSize::mb(64), content_cache_size: ReadableSize::mb(128), + content_cache_page_size: ReadableSize::mb(8), }; if let Some(sys_memory) = common_config::utils::get_sys_total_memory() { diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index d5e47d213657..f6d1dbafeec9 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -893,6 +893,14 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to read file metadata"))] + Metadata { + #[snafu(source)] + error: std::io::Error, + #[snafu(implicit)] + location: Location, + }, } pub type Result = std::result::Result; @@ -965,7 +973,8 @@ impl ErrorExt for Error { | CreateDir { .. } | ReadDataPart { .. } | CorruptedEntry { .. } - | BuildEntry { .. } => StatusCode::Internal, + | BuildEntry { .. } + | Metadata { .. } => StatusCode::Internal, OpenRegion { source, .. } => source.status_code(), diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index a4f4ab9e446b..1972f3d7abb6 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -18,7 +18,7 @@ pub(crate) mod intermediate; pub(crate) mod inverted_index; pub(crate) mod puffin_manager; mod statistics; -mod store; +pub(crate) mod store; use std::num::NonZeroUsize; diff --git a/src/mito2/src/sst/index/inverted_index/applier.rs b/src/mito2/src/sst/index/inverted_index/applier.rs index d060d4bec17b..0542fd7a59ea 100644 --- a/src/mito2/src/sst/index/inverted_index/applier.rs +++ b/src/mito2/src/sst/index/inverted_index/applier.rs @@ -16,6 +16,7 @@ pub mod builder; use std::sync::Arc; +use common_base::range_read::RangeReader; use common_telemetry::warn; use index::inverted_index::format::reader::InvertedIndexBlobReader; use index::inverted_index::search::index_apply::{ @@ -29,7 +30,9 @@ use store_api::storage::RegionId; use crate::cache::file_cache::{FileCacheRef, FileType, IndexKey}; use crate::cache::index::{CachedInvertedIndexBlobReader, InvertedIndexCacheRef}; -use crate::error::{ApplyInvertedIndexSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result}; +use crate::error::{ + ApplyInvertedIndexSnafu, MetadataSnafu, PuffinBuildReaderSnafu, PuffinReadBlobSnafu, Result, +}; use crate::metrics::{INDEX_APPLY_ELAPSED, INDEX_APPLY_MEMORY_USAGE}; use crate::sst::file::FileId; use crate::sst::index::inverted_index::INDEX_BLOB_TYPE; @@ -123,7 +126,7 @@ impl InvertedIndexApplier { index_not_found_strategy: IndexNotFoundStrategy::ReturnEmpty, }; - let blob = match self.cached_blob_reader(file_id).await { + let mut blob = match self.cached_blob_reader(file_id).await { Ok(Some(puffin_reader)) => puffin_reader, other => { if let Err(err) = other { @@ -134,8 +137,14 @@ impl InvertedIndexApplier { }; if let Some(index_cache) = &self.inverted_index_cache { + let file_size = if let Some(file_size) = file_size_hint { + file_size + } else { + blob.metadata().await.context(MetadataSnafu)?.content_length + }; let mut index_reader = CachedInvertedIndexBlobReader::new( file_id, + file_size, InvertedIndexBlobReader::new(blob), index_cache.clone(), ); diff --git a/src/mito2/src/sst/index/inverted_index/creator.rs b/src/mito2/src/sst/index/inverted_index/creator.rs index 43cf54fa2811..15cba55c4437 100644 --- a/src/mito2/src/sst/index/inverted_index/creator.rs +++ b/src/mito2/src/sst/index/inverted_index/creator.rs @@ -448,7 +448,7 @@ mod tests { move |expr| { let _d = &d; - let cache = Arc::new(InvertedIndexCache::new(10, 10)); + let cache = Arc::new(InvertedIndexCache::new(10, 10, 100)); let puffin_metadata_cache = Arc::new(PuffinMetadataCache::new(10, &CACHE_BYTES)); let applier = InvertedIndexApplierBuilder::new( region_dir.clone(), diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs index dec175e76ff6..314e886ba9ca 100644 --- a/src/mito2/src/test_util.rs +++ b/src/mito2/src/test_util.rs @@ -35,8 +35,7 @@ use api::v1::{OpType, Row, Rows, SemanticType}; use common_base::readable_size::ReadableSize; use common_base::Plugins; use common_datasource::compression::CompressionType; -use common_meta::cache::{new_schema_cache, new_table_info_cache, new_table_schema_cache}; -use common_meta::key::schema_name::{SchemaName, SchemaNameValue}; +use common_meta::cache::{new_schema_cache, new_table_schema_cache}; use common_meta::key::{SchemaMetadataManager, SchemaMetadataManagerRef}; use common_meta::kv_backend::memory::MemoryKvBackend; use common_meta::kv_backend::KvBackendRef; @@ -49,7 +48,7 @@ use datatypes::schema::ColumnSchema; use log_store::kafka::log_store::KafkaLogStore; use log_store::raft_engine::log_store::RaftEngineLogStore; use log_store::test_util::log_store_util; -use moka::future::{Cache, CacheBuilder}; +use moka::future::CacheBuilder; use object_store::manager::{ObjectStoreManager, ObjectStoreManagerRef}; use object_store::services::Fs; use object_store::ObjectStore; diff --git a/src/mito2/src/worker.rs b/src/mito2/src/worker.rs index f8ab9c3f4edb..233ab9f056b1 100644 --- a/src/mito2/src/worker.rs +++ b/src/mito2/src/worker.rs @@ -170,6 +170,7 @@ impl WorkerGroup { .selector_result_cache_size(config.selector_result_cache_size.as_bytes()) .index_metadata_size(config.inverted_index.metadata_cache_size.as_bytes()) .index_content_size(config.inverted_index.content_cache_size.as_bytes()) + .index_content_page_size(config.inverted_index.content_cache_page_size.as_bytes()) .puffin_metadata_size(config.index.metadata_cache_size.as_bytes()) .write_cache(write_cache) .build(), diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index ab2ec4ea6777..4843b81e9142 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -946,6 +946,7 @@ create_on_flush = "auto" create_on_compaction = "auto" apply_on_query = "auto" mem_threshold_on_create = "auto" +content_cache_page_size = "8MiB" [region_engine.mito.fulltext_index] create_on_flush = "auto" From bef6896280a4dd5833617df04378667bce13a634 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Fri, 13 Dec 2024 16:17:49 +0800 Subject: [PATCH 21/59] docs: Add index panels to standalone grafana dashboard (#5140) * docs: Add index panels to standalnoe grafana dashboard * docs: fix flush/compaction op --- grafana/greptimedb.json | 3101 ++++++++++++++++++++++++--------------- 1 file changed, 1930 insertions(+), 1171 deletions(-) diff --git a/grafana/greptimedb.json b/grafana/greptimedb.json index 7c6dfb075187..86925d53420f 100644 --- a/grafana/greptimedb.json +++ b/grafana/greptimedb.json @@ -145,7 +145,9 @@ "countRows": false, "enablePagination": false, "fields": [], - "reducer": ["sum"], + "reducer": [ + "sum" + ], "show": false }, "showHeader": true, @@ -223,7 +225,9 @@ "justifyMode": "center", "orientation": "auto", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "fields": "", "values": false }, @@ -298,7 +302,9 @@ "justifyMode": "auto", "orientation": "auto", "reduceOptions": { - "calcs": ["lastNotNull"], + "calcs": [ + "lastNotNull" + ], "fields": "", "values": false }, @@ -595,7 +601,7 @@ "type": "timeseries" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -603,768 +609,764 @@ "y": 10 }, "id": 24, - "panels": [], - "title": "Protocol", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 11 + }, + "id": 34, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 11 - }, - "id": 34, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "promql-{{db}}-p95", + "range": true, + "refId": "PromQL P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "promql-{{db}}-p99", + "range": true, + "refId": "PromQL P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "sql-{{db}}-p95", + "range": true, + "refId": "SQL P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "sql-{{db}}-p99", + "range": true, + "refId": "SQL P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-read-{{db}}-p95", + "range": true, + "refId": "PromStore Read P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-read-{{db}}-p99", + "range": true, + "refId": "PromStore Read P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "prom-promql-{{db}}-{{method}}-p95", + "range": true, + "refId": "Prometheus PromQL P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "prom-promql-{{db}}-{{method}}-p99", + "range": true, + "refId": "Prometheus PromQL P99" + } + ], + "title": "HTTP query elapsed", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "promql-{{db}}-p95", - "range": true, - "refId": "PromQL P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_promql_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "promql-{{db}}-p99", - "range": true, - "refId": "PromQL P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 11 }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "sql-{{db}}-p95", - "range": true, - "refId": "SQL P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "id": 35, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_sql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "sql-{{db}}-p99", - "range": true, - "refId": "SQL P99" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "influx-{{db}}-p95", + "range": true, + "refId": "InfluxDB Line Protocol P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "influx-{{db}}-p99", + "range": true, + "refId": "InfluxDB Line Protocol P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-{{db}}-p95", + "range": true, + "refId": "PromStore Write P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "promstore-{{db}}-p99", + "range": true, + "refId": "PromStore Write P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-metric-{{db}}-p95", + "range": true, + "refId": "OTLP Metric P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-metric-{{db}}-p99", + "range": true, + "refId": "OTLP Metric P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-trace-{{db}}-p95", + "range": true, + "refId": "OTLP Trace P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "otlp-trace-{{db}}-p99", + "range": true, + "refId": "OTLP Trace P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-transform-{{db}}-p95", + "range": true, + "refId": "Log Transform P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-transform-{{db}}-p99", + "range": true, + "refId": "Log Transform P99" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-ingest-{{db}}-p99", + "range": true, + "refId": "Log Ingest P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "log-ingest-{{db}}-p99", + "range": true, + "refId": "Log Ingest P99" + } + ], + "title": "HTTP write elapsed", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-read-{{db}}-p95", - "range": true, - "refId": "PromStore Read P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_read_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-read-{{db}}-p99", - "range": true, - "refId": "PromStore Read P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 18 }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "prom-promql-{{db}}-{{method}}-p95", - "range": true, - "refId": "Prometheus PromQL P95" + "id": 38, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(path) (rate(greptime_servers_http_requests_total[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "HTTP request rate", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db, method) (rate(greptime_servers_http_prometheus_promql_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "prom-promql-{{db}}-{{method}}-p99", - "range": true, - "refId": "Prometheus PromQL P99" - } - ], - "title": "HTTP query elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 18 + }, + "id": 36, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 11 - }, - "id": 35, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "influx-{{db}}-p95", - "range": true, - "refId": "InfluxDB Line Protocol P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_influxdb_write_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "influx-{{db}}-p99", - "range": true, - "refId": "InfluxDB Line Protocol P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-{{db}}-p95", - "range": true, - "refId": "PromStore Write P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_prometheus_write_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "promstore-{{db}}-p99", - "range": true, - "refId": "PromStore Write P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-metric-{{db}}-p95", - "range": true, - "refId": "OTLP Metric P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_metrics_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-metric-{{db}}-p99", - "range": true, - "refId": "OTLP Metric P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-trace-{{db}}-p95", - "range": true, - "refId": "OTLP Trace P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_otlp_traces_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "otlp-trace-{{db}}-p99", - "range": true, - "refId": "OTLP Trace P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-transform-{{db}}-p95", - "range": true, - "refId": "Log Transform P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_transform_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-transform-{{db}}-p99", - "range": true, - "refId": "Log Transform P99" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-ingest-{{db}}-p99", - "range": true, - "refId": "Log Ingest P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_http_logs_ingestion_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "log-ingest-{{db}}-p99", - "range": true, - "refId": "Log Ingest P99" - } - ], - "title": "HTTP write elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 18 - }, - "id": 38, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by(path) (rate(greptime_servers_http_requests_total[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "HTTP request rate", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 18 - }, - "id": 36, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum by(db) (rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Logs ingest rate (number of lines)", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum by(db) (rate(greptime_servers_http_logs_ingestion_counter[$__rate_interval]))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}", + "range": true, + "refId": "A", + "useBackend": false } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 25 - }, - "id": 13, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}-p95", - "range": true, - "refId": "A", - "useBackend": false + ], + "title": "Logs ingest rate (number of lines)", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{db}}-p99", - "range": true, - "refId": "B", - "useBackend": false + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 25 + }, + "id": 13, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, db) (rate(greptime_servers_grpc_requests_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{db}}-p99", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "gRPC insert elapsed", + "type": "timeseries" } ], - "title": "gRPC insert elapsed", - "type": "timeseries" + "title": "Protocol", + "type": "row" }, { "collapsed": false, @@ -1372,7 +1374,7 @@ "h": 1, "w": 24, "x": 0, - "y": 32 + "y": 11 }, "id": 25, "panels": [], @@ -1426,7 +1428,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1441,7 +1444,7 @@ "h": 7, "w": 12, "x": 0, - "y": 33 + "y": 12 }, "id": 1, "interval": "1s", @@ -1542,7 +1545,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1557,7 +1561,7 @@ "h": 7, "w": 12, "x": 12, - "y": 33 + "y": 12 }, "id": 7, "interval": "1s", @@ -1641,7 +1645,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1656,7 +1661,7 @@ "h": 7, "w": 12, "x": 0, - "y": 40 + "y": 19 }, "id": 3, "interval": "1s", @@ -1740,7 +1745,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1755,7 +1761,7 @@ "h": 7, "w": 12, "x": 12, - "y": 40 + "y": 19 }, "id": 11, "interval": "1s", @@ -1856,7 +1862,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1871,7 +1878,7 @@ "h": 7, "w": 12, "x": 0, - "y": 47 + "y": 26 }, "id": 15, "interval": "1s", @@ -1968,7 +1975,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -1983,10 +1991,9 @@ "h": 7, "w": 12, "x": 12, - "y": 47 + "y": 26 }, "id": 39, - "interval": "1s", "options": { "legend": { "calcs": [], @@ -2006,8 +2013,8 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "idelta(greptime_mito_compaction_stage_elapsed_count{stage=\"merge\"}[5m])", + "editorMode": "builder", + "expr": "idelta(greptime_mito_compaction_stage_elapsed_count{stage=\"merge\"}[$__interval])", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, @@ -2016,233 +2023,25 @@ "refId": "A", "useBackend": false }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_mito_flush_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "flush-{{type}}", - "range": true, - "refId": "B" - } - ], - "title": "Flush / compaction count", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 54 - }, - "id": 9, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_mito_write_buffer_bytes", + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, type) (idelta(greptime_mito_flush_elapsed_bucket[$__interval])))", "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "greptime_mito_memtable_dict_bytes", "hide": false, + "includeNullMetadata": true, "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "B" - } - ], - "title": "Write buffer size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green" - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 54 - }, - "id": 40, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_mito_write_stall_total", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-worker-{{worker}}", + "legendFormat": "flush-{{type}}", "range": true, - "refId": "A", + "refId": "B", "useBackend": false } ], - "title": "Write stall count", + "title": "Flush / compaction count", "type": "timeseries" }, { @@ -2292,7 +2091,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2308,9 +2108,9 @@ "h": 7, "w": 12, "x": 0, - "y": 61 + "y": 33 }, - "id": 41, + "id": 9, "interval": "1s", "options": { "legend": { @@ -2332,17 +2132,30 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "greptime_mito_cache_bytes", + "expr": "greptime_mito_write_buffer_bytes", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "{{instance}}-{{type}}", + "legendFormat": "{{instance}}", "range": true, "refId": "A", "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "greptime_mito_memtable_dict_bytes", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "B" } ], - "title": "Cache size", + "title": "Write buffer size", "type": "timeseries" }, { @@ -2392,15 +2205,15 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - }, - "unit": "percentunit" + } }, "overrides": [] }, @@ -2408,9 +2221,9 @@ "h": 7, "w": 12, "x": 12, - "y": 61 + "y": 33 }, - "id": 42, + "id": 40, "interval": "1s", "options": { "legend": { @@ -2431,33 +2244,20 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(increase(greptime_mito_cache_hit[$__rate_interval])) by (instance, type) / (sum(increase(greptime_mito_cache_miss[$__rate_interval])) by (instance, type) + sum(increase(greptime_mito_cache_hit[$__rate_interval])) by (instance, type))", + "editorMode": "builder", + "expr": "rate(greptime_mito_write_stall_total[$__rate_interval])", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "{{instance}}-{{type}}", + "legendFormat": "{{instance}}-worker-{{worker}}", "range": true, "refId": "A", "useBackend": false } ], - "title": "Cache hit", + "title": "Write stall count", "type": "timeseries" }, - { - "collapsed": false, - "gridPos": { - "h": 1, - "w": 24, - "x": 0, - "y": 68 - }, - "id": 26, - "panels": [], - "title": "Metric Engine", - "type": "row" - }, { "datasource": { "type": "prometheus", @@ -2505,14 +2305,16 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "bytes" }, "overrides": [] }, @@ -2520,9 +2322,9 @@ "h": 7, "w": 12, "x": 0, - "y": 69 + "y": 40 }, - "id": 22, + "id": 41, "interval": "1s", "options": { "legend": { @@ -2544,30 +2346,17 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", + "expr": "greptime_mito_cache_bytes", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "p95-{{operation}}", + "legendFormat": "{{instance}}-{{type}}", "range": true, "refId": "A", "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "p99-{{operation}}", - "range": true, - "refId": "B" } ], - "title": "Metric engine to mito R/W duration", + "title": "Cache size", "type": "timeseries" }, { @@ -2617,14 +2406,16 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "percentunit" }, "overrides": [] }, @@ -2632,9 +2423,9 @@ "h": 7, "w": 12, "x": 12, - "y": 69 + "y": 40 }, - "id": 33, + "id": 42, "interval": "1s", "options": { "legend": { @@ -2654,33 +2445,1014 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "p95-{{operation}}", - "range": true, - "refId": "A", - "useBackend": false + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_mito_cache_hit[$__rate_interval])) by (instance, type) / (sum(increase(greptime_mito_cache_miss[$__rate_interval])) by (instance, type) + sum(increase(greptime_mito_cache_hit[$__rate_interval])) by (instance, type))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Cache hit", + "type": "timeseries" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 26, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 4 + }, + "id": 22, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "p95-{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99-{{operation}}", + "range": true, + "refId": "B" + } + ], + "title": "Metric engine to mito R/W duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green" + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 4 + }, + "id": 33, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "p95-{{operation}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "p99-{{label_name}}", + "range": true, + "refId": "B" + } + ], + "title": "Metric engine to mito DDL duration", + "type": "timeseries" + } + ], + "title": "Metric Engine", + "type": "row" + }, + { + "collapsed": true, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 48 + }, + "id": 21, + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 18, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{scheme}}-{{operation}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "OpenDAL traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OpenDAL operation duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 43, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_object_store_lru_cache_bytes", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 44, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 10, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p95", + "range": true, + "refId": "Log Store P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p99", + "range": true, + "refId": "Log Store P99" + } + ], + "title": "Log Store op duration seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 12, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p99", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "throughput", + "range": true, + "refId": "B" + } + ], + "title": "WAL write size", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, operation) (rate(greptime_metric_engine_mito_ddl_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "p99-{{label_name}}", - "range": true, - "refId": "B" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 37, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{node}}-{{type}}-p99", + "range": true, + "refId": "Log Store P95" + } + ], + "title": "WAL sync duration seconds", + "type": "timeseries" } ], - "title": "Metric engine to mito DDL duration", - "type": "timeseries" + "title": "Storage Components", + "type": "row" }, { "collapsed": false, @@ -2688,11 +3460,11 @@ "h": 1, "w": 24, "x": 0, - "y": 76 + "y": 49 }, - "id": 21, + "id": 46, "panels": [], - "title": "Storage Components", + "title": "Index", "type": "row" }, { @@ -2742,7 +3514,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2758,9 +3531,9 @@ "h": 7, "w": 12, "x": 0, - "y": 77 + "y": 50 }, - "id": 18, + "id": 45, "interval": "1s", "options": { "legend": { @@ -2782,18 +3555,30 @@ }, "disableTextWrap": false, "editorMode": "code", - "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "expr": "greptime_index_create_memory_usage", "fullMetaSearch": false, - "hide": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "{{scheme}}-{{operation}}", + "legendFormat": "{{instance}}-{{type}}", "range": true, - "refId": "B", + "refId": "A", "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "greptime_index_apply_memory_usage", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "B" } ], - "title": "OpenDAL traffic", + "title": "Index memory usage", "type": "timeseries" }, { @@ -2843,14 +3628,16 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "none" }, "overrides": [] }, @@ -2858,9 +3645,10 @@ "h": 7, "w": 12, "x": 12, - "y": 77 + "y": 50 }, - "id": 2, + "id": 19, + "interval": "1s", "options": { "legend": { "calcs": [], @@ -2880,18 +3668,69 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "__auto", + "legendFormat": "apply-{{type}}-p95", "range": true, - "refId": "A", + "refId": "Apply P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "apply-{{type}}-p95", + "range": true, + "refId": "Apply P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P99", "useBackend": false } ], - "title": "OpenDAL operation duration", + "title": "Index elapsed", "type": "timeseries" }, { @@ -2941,7 +3780,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -2956,9 +3796,9 @@ "h": 7, "w": 12, "x": 0, - "y": 84 + "y": 57 }, - "id": 10, + "id": 47, "interval": "1s", "options": { "legend": { @@ -2978,29 +3818,19 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, + "disableTextWrap": false, "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p95", - "range": true, - "refId": "Log Store P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, + "expr": "rate(greptime_index_create_rows_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p99", + "legendFormat": "{{type}}", "range": true, - "refId": "Log Store P99" + "refId": "A", + "useBackend": false } ], - "title": "Log Store op duration seconds", + "title": "Index create rows total", "type": "timeseries" }, { @@ -3050,7 +3880,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3058,7 +3889,7 @@ } ] }, - "unit": "none" + "unit": "bytes" }, "overrides": [] }, @@ -3066,9 +3897,9 @@ "h": 7, "w": 12, "x": 12, - "y": 84 + "y": 57 }, - "id": 19, + "id": 48, "interval": "1s", "options": { "legend": { @@ -3089,69 +3920,19 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "create-{{type}}-p95", - "range": true, - "refId": "Create P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "editorMode": "builder", + "expr": "sum by(instance, type) (rate(greptime_index_create_bytes_total[$__rate_interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "create-{{type}}-p95", + "legendFormat": "{{instance}}-{{type}}", "range": true, - "refId": "Create P99", + "refId": "B", "useBackend": false } ], - "title": "Index elapsed", + "title": "Index create bytes", "type": "timeseries" }, { @@ -3201,14 +3982,16 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", "value": 80 } ] - } + }, + "unit": "bytes" }, "overrides": [] }, @@ -3216,9 +3999,9 @@ "h": 7, "w": 12, "x": 0, - "y": 91 + "y": 64 }, - "id": 37, + "id": 49, "interval": "1s", "options": { "legend": { @@ -3238,16 +4021,20 @@ "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_bytes_total[$__rate_interval]))", + "fullMetaSearch": false, "hide": false, + "includeNullMetadata": false, "instant": false, - "legendFormat": "{{node}}-{{type}}-p99", + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", "range": true, - "refId": "Log Store P95" + "refId": "B", + "useBackend": false } ], - "title": "WAL sync duration seconds", + "title": "Index IO bytes", "type": "timeseries" }, { @@ -3297,7 +4084,8 @@ "mode": "absolute", "steps": [ { - "color": "green" + "color": "green", + "value": null }, { "color": "red", @@ -3305,7 +4093,7 @@ } ] }, - "unit": "bytes" + "unit": "none" }, "overrides": [] }, @@ -3313,9 +4101,9 @@ "h": 7, "w": 12, "x": 12, - "y": 91 + "y": 64 }, - "id": 12, + "id": 50, "interval": "1s", "options": { "legend": { @@ -3336,48 +4124,19 @@ "uid": "${DS_PROMETHEUS-1}" }, "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p95", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_op_total[$__rate_interval]))", "fullMetaSearch": false, "hide": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "req-size-p99", + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", "range": true, - "refId": "C", + "refId": "B", "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", - "hide": false, - "instant": false, - "legendFormat": "throughput", - "range": true, - "refId": "B" } ], - "title": "WAL write size", + "title": "Index IO op", "type": "timeseries" } ], From 53d55c0b6bb9ae9b4c479904ae966bab0ea4f950 Mon Sep 17 00:00:00 2001 From: localhost Date: Fri, 13 Dec 2024 18:10:59 +0800 Subject: [PATCH 22/59] fix: loki write row len error (#5161) --- src/servers/src/http/event.rs | 4 ++-- tests-integration/tests/http.rs | 20 +++++++++++++------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/src/servers/src/http/event.rs b/src/servers/src/http/event.rs index b6b520627d66..c0926af833d6 100644 --- a/src/servers/src/http/event.rs +++ b/src/servers/src/http/event.rs @@ -514,8 +514,8 @@ pub async fn loki_ingest( let line = entry.line; // create and init row - let mut row = Vec::with_capacity(schemas.capacity()); - for _ in 0..row.capacity() { + let mut row = Vec::with_capacity(schemas.len()); + for _ in 0..schemas.len() { row.push(GreptimeValue { value_data: None }); } // insert ts and line diff --git a/tests-integration/tests/http.rs b/tests-integration/tests/http.rs index 4843b81e9142..fb2824790848 100644 --- a/tests-integration/tests/http.rs +++ b/tests-integration/tests/http.rs @@ -1816,11 +1816,17 @@ pub async fn test_loki_logs(store_type: StorageType) { // init loki request let req: PushRequest = PushRequest { streams: vec![StreamAdapter { - labels: "{service=\"test\",source=\"integration\"}".to_string(), - entries: vec![EntryAdapter { - timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), - line: "this is a log message".to_string(), - }], + labels: r#"{service="test",source="integration","wadaxi"="do anything"}"#.to_string(), + entries: vec![ + EntryAdapter { + timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), + line: "this is a log message".to_string(), + }, + EntryAdapter { + timestamp: Some(Timestamp::from_str("2024-11-07T10:53:50").unwrap()), + line: "this is a log message".to_string(), + }, + ], hash: rand::random(), }], }; @@ -1848,7 +1854,7 @@ pub async fn test_loki_logs(store_type: StorageType) { assert_eq!(StatusCode::OK, res.status()); // test schema - let expected = "[[\"loki_table_name\",\"CREATE TABLE IF NOT EXISTS \\\"loki_table_name\\\" (\\n \\\"greptime_timestamp\\\" TIMESTAMP(9) NOT NULL,\\n \\\"line\\\" STRING NULL,\\n \\\"service\\\" STRING NULL,\\n \\\"source\\\" STRING NULL,\\n TIME INDEX (\\\"greptime_timestamp\\\"),\\n PRIMARY KEY (\\\"service\\\", \\\"source\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; + let expected = "[[\"loki_table_name\",\"CREATE TABLE IF NOT EXISTS \\\"loki_table_name\\\" (\\n \\\"greptime_timestamp\\\" TIMESTAMP(9) NOT NULL,\\n \\\"line\\\" STRING NULL,\\n \\\"service\\\" STRING NULL,\\n \\\"source\\\" STRING NULL,\\n \\\"wadaxi\\\" STRING NULL,\\n TIME INDEX (\\\"greptime_timestamp\\\"),\\n PRIMARY KEY (\\\"service\\\", \\\"source\\\", \\\"wadaxi\\\")\\n)\\n\\nENGINE=mito\\nWITH(\\n append_mode = 'true'\\n)\"]]"; validate_data( "loki_schema", &client, @@ -1858,7 +1864,7 @@ pub async fn test_loki_logs(store_type: StorageType) { .await; // test content - let expected = r#"[[1730976830000000000,"this is a log message","test","integration"]]"#; + let expected = r#"[[1730976830000000000,"this is a log message","test","integration","do anything"],[1730976830000000000,"this is a log message","test","integration","do anything"]]"#; validate_data( "loki_content", &client, From 579059d99f485f31e242f089ffccf6c88ce6520b Mon Sep 17 00:00:00 2001 From: Yingwen Date: Fri, 13 Dec 2024 20:53:11 +0800 Subject: [PATCH 23/59] ci: use 4xlarge for nightly build (#5158) --- .github/workflows/nightly-build.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/nightly-build.yml b/.github/workflows/nightly-build.yml index 09fcc5c26eba..afe01f11ec27 100644 --- a/.github/workflows/nightly-build.yml +++ b/.github/workflows/nightly-build.yml @@ -12,7 +12,7 @@ on: linux_amd64_runner: type: choice description: The runner uses to build linux-amd64 artifacts - default: ec2-c6i.2xlarge-amd64 + default: ec2-c6i.4xlarge-amd64 options: - ubuntu-20.04 - ubuntu-20.04-8-cores @@ -27,7 +27,7 @@ on: linux_arm64_runner: type: choice description: The runner uses to build linux-arm64 artifacts - default: ec2-c6g.2xlarge-arm64 + default: ec2-c6g.4xlarge-arm64 options: - ec2-c6g.xlarge-arm64 # 4C8G - ec2-c6g.2xlarge-arm64 # 8C16G From 358d5e1d63cc28cee30fe3b213fef26f5fb71aac Mon Sep 17 00:00:00 2001 From: Niwaka <61189782+NiwakaDev@users.noreply.github.com> Date: Sun, 15 Dec 2024 18:05:29 +0900 Subject: [PATCH 24/59] fix: support alter table ~ add ~ custom_type (#5165) --- .../src/statements/transform/type_alias.rs | 4 ++++ .../common/alter/alter_table.result | 19 +++++++++++++------ .../standalone/common/alter/alter_table.sql | 7 ++++++- 3 files changed, 23 insertions(+), 7 deletions(-) diff --git a/src/sql/src/statements/transform/type_alias.rs b/src/sql/src/statements/transform/type_alias.rs index 9e51ca918041..d670a63b47ed 100644 --- a/src/sql/src/statements/transform/type_alias.rs +++ b/src/sql/src/statements/transform/type_alias.rs @@ -57,6 +57,10 @@ impl TransformRule for TypeAliasTransformRule { alter_table.alter_operation_mut() { replace_type_alias(target_type) + } else if let AlterTableOperation::AddColumn { column_def, .. } = + alter_table.alter_operation_mut() + { + replace_type_alias(&mut column_def.data_type); } } _ => {} diff --git a/tests/cases/standalone/common/alter/alter_table.result b/tests/cases/standalone/common/alter/alter_table.result index 120e7695d03f..5c1dbfca77a8 100644 --- a/tests/cases/standalone/common/alter/alter_table.result +++ b/tests/cases/standalone/common/alter/alter_table.result @@ -140,10 +140,17 @@ ADD Affected Rows: 0 +ALTER TABLE + t2 +ADD + COLUMN at4 UINT16; + +Affected Rows: 0 + INSERT INTO t2 VALUES - ("loc_1", "loc_2", "loc_3", 'job1', 0, 1); + ("loc_1", "loc_2", "loc_3", 2, 'job1', 0, 1); Affected Rows: 1 @@ -152,11 +159,11 @@ SELECT FROM t2; -+-------+-------+-------+------+---------------------+-----+ -| at | at2 | at3 | job | ts | val | -+-------+-------+-------+------+---------------------+-----+ -| loc_1 | loc_2 | loc_3 | job1 | 1970-01-01T00:00:00 | 1.0 | -+-------+-------+-------+------+---------------------+-----+ ++-------+-------+-------+-----+------+---------------------+-----+ +| at | at2 | at3 | at4 | job | ts | val | ++-------+-------+-------+-----+------+---------------------+-----+ +| loc_1 | loc_2 | loc_3 | 2 | job1 | 1970-01-01T00:00:00 | 1.0 | ++-------+-------+-------+-----+------+---------------------+-----+ DROP TABLE t1; diff --git a/tests/cases/standalone/common/alter/alter_table.sql b/tests/cases/standalone/common/alter/alter_table.sql index 7f3e0b664038..c52a2445db42 100644 --- a/tests/cases/standalone/common/alter/alter_table.sql +++ b/tests/cases/standalone/common/alter/alter_table.sql @@ -67,10 +67,15 @@ ALTER TABLE ADD COLUMN at2 STRING; +ALTER TABLE + t2 +ADD + COLUMN at4 UINT16; + INSERT INTO t2 VALUES - ("loc_1", "loc_2", "loc_3", 'job1', 0, 1); + ("loc_1", "loc_2", "loc_3", 2, 'job1', 0, 1); SELECT * From 9d7fea902e6a87d0decf3b96f51e81c2c1569e73 Mon Sep 17 00:00:00 2001 From: shuiyisong <113876041+shuiyisong@users.noreply.github.com> Date: Mon, 16 Dec 2024 14:17:27 +0800 Subject: [PATCH 25/59] chore: remove unused dep (#5163) * chore: remove unused dep * chore: remove more unused dep --- Cargo.lock | 179 +------------------------------ src/cache/Cargo.toml | 1 - src/catalog/Cargo.toml | 3 - src/cli/Cargo.toml | 2 - src/client/Cargo.toml | 2 - src/common/catalog/Cargo.toml | 5 - src/common/datasource/Cargo.toml | 1 - src/common/frontend/Cargo.toml | 5 - src/common/function/Cargo.toml | 1 - src/common/runtime/Cargo.toml | 2 - src/file-engine/Cargo.toml | 1 - src/flow/Cargo.toml | 1 - src/frontend/Cargo.toml | 2 - src/metric-engine/Cargo.toml | 1 - src/mito2/Cargo.toml | 2 - src/pipeline/Cargo.toml | 1 - src/promql/Cargo.toml | 3 - src/query/Cargo.toml | 4 - src/script/Cargo.toml | 2 - src/servers/Cargo.toml | 1 - src/store-api/Cargo.toml | 1 - 21 files changed, 2 insertions(+), 218 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index b60615c8e54c..df817dc201cd 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,26 +222,6 @@ dependencies = [ "num-traits", ] -[[package]] -name = "approx_eq" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3f9eb837c6a783fbf002e3e5cc7925a3aa6893d6d42f9169517528983777590" - -[[package]] -name = "aquamarine" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1da02abba9f9063d786eab1509833ebb2fac0f966862ca59439c76b9c566760" -dependencies = [ - "include_dir", - "itertools 0.10.5", - "proc-macro-error", - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "arbitrary" version = "1.3.2" @@ -1310,7 +1290,6 @@ dependencies = [ "common-meta", "moka", "snafu 0.8.5", - "substrait 0.12.0", ] [[package]] @@ -1349,7 +1328,6 @@ dependencies = [ "catalog", "chrono", "common-catalog", - "common-config", "common-error", "common-macro", "common-meta", @@ -1358,7 +1336,6 @@ dependencies = [ "common-recordbatch", "common-runtime", "common-telemetry", - "common-test-util", "common-time", "common-version", "dashmap", @@ -1369,7 +1346,6 @@ dependencies = [ "humantime", "itertools 0.10.5", "lazy_static", - "log-store", "meta-client", "moka", "object-store", @@ -1693,7 +1669,6 @@ dependencies = [ "common-grpc", "common-macro", "common-meta", - "common-options", "common-procedure", "common-query", "common-recordbatch", @@ -1722,7 +1697,6 @@ dependencies = [ "store-api", "substrait 0.12.0", "table", - "temp-env", "tempfile", "tokio", "tracing-appender", @@ -1746,8 +1720,6 @@ dependencies = [ "common-query", "common-recordbatch", "common-telemetry", - "datanode", - "derive-new 0.5.9", "enum_dispatch", "futures-util", "lazy_static", @@ -1928,13 +1900,6 @@ dependencies = [ [[package]] name = "common-catalog" version = "0.12.0" -dependencies = [ - "chrono", - "common-error", - "common-macro", - "snafu 0.8.5", - "tokio", -] [[package]] name = "common-config" @@ -1978,7 +1943,6 @@ dependencies = [ "datafusion", "datatypes", "derive_builder 0.12.0", - "dotenv", "futures", "lazy_static", "object-store", @@ -2022,15 +1986,10 @@ dependencies = [ name = "common-frontend" version = "0.12.0" dependencies = [ - "api", "async-trait", - "common-base", "common-error", "common-macro", - "common-query", - "session", "snafu 0.8.5", - "sql", ] [[package]] @@ -2064,7 +2023,6 @@ dependencies = [ "num-traits", "once_cell", "paste", - "ron", "s2", "serde", "serde_json", @@ -2353,8 +2311,6 @@ dependencies = [ "snafu 0.8.5", "tempfile", "tokio", - "tokio-metrics", - "tokio-metrics-collector", "tokio-test", "tokio-util", ] @@ -2834,16 +2790,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "ctor" -version = "0.1.26" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d2301688392eb071b0bf1a37be05c469d3cc4dbbd95df672fe28ab021e6a096" -dependencies = [ - "quote", - "syn 1.0.109", -] - [[package]] name = "darling" version = "0.14.4" @@ -3386,17 +3332,6 @@ dependencies = [ "syn 1.0.109", ] -[[package]] -name = "derive-new" -version = "0.5.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3418329ca0ad70234b9735dc4ceed10af4df60eff9c8e7b06cb5e520d92c3535" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", -] - [[package]] name = "derive-new" version = "0.7.0" @@ -3919,7 +3854,6 @@ dependencies = [ "common-error", "common-macro", "common-procedure", - "common-procedure-test", "common-query", "common-recordbatch", "common-telemetry", @@ -4067,7 +4001,6 @@ dependencies = [ "itertools 0.10.5", "lazy_static", "meta-client", - "minstant", "nom", "num-traits", "operator", @@ -4114,15 +4047,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "format_num" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14ac05eb8d2eb4ed1eeff847911deae077b0b53332465de9d6a26b0ea9961bc8" -dependencies = [ - "regex", -] - [[package]] name = "fragile" version = "2.0.0" @@ -4145,7 +4069,6 @@ dependencies = [ "common-config", "common-datasource", "common-error", - "common-frontend", "common-function", "common-grpc", "common-macro", @@ -4167,7 +4090,6 @@ dependencies = [ "lazy_static", "log-store", "meta-client", - "meta-srv", "opentelemetry-proto 0.5.0", "operator", "partition", @@ -5244,25 +5166,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" -[[package]] -name = "include_dir" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" -dependencies = [ - "include_dir_macros", -] - -[[package]] -name = "include_dir_macros" -version = "0.7.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" -dependencies = [ - "proc-macro2", - "quote", -] - [[package]] name = "index" version = "0.12.0" @@ -6535,7 +6438,6 @@ name = "metric-engine" version = "0.12.0" dependencies = [ "api", - "aquamarine", "async-trait", "base64 0.21.7", "common-base", @@ -6600,16 +6502,6 @@ dependencies = [ "adler2", ] -[[package]] -name = "minstant" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fb9b5c752f145ac5046bccc3c4f62892e3c950c1d1eab80c5949cd68a2078db" -dependencies = [ - "ctor", - "web-time 1.1.0", -] - [[package]] name = "mio" version = "0.8.11" @@ -6639,7 +6531,6 @@ name = "mito2" version = "0.12.0" dependencies = [ "api", - "aquamarine", "async-channel 1.9.0", "async-stream", "async-trait", @@ -6653,7 +6544,6 @@ dependencies = [ "common-function", "common-macro", "common-meta", - "common-procedure-test", "common-query", "common-recordbatch", "common-runtime", @@ -8090,7 +7980,7 @@ dependencies = [ "async-trait", "bytes", "chrono", - "derive-new 0.7.0", + "derive-new", "futures", "hex", "lazy-regex", @@ -8230,7 +8120,6 @@ dependencies = [ "query", "rayon", "regex", - "ron", "serde", "serde_json", "session", @@ -8642,10 +8531,7 @@ dependencies = [ "greptime-proto", "lazy_static", "prometheus", - "promql-parser", "prost 0.12.6", - "query", - "session", "snafu 0.8.5", "tokio", ] @@ -8993,7 +8879,6 @@ version = "0.12.0" dependencies = [ "ahash 0.8.11", "api", - "approx_eq", "arc-swap", "arrow", "arrow-schema", @@ -9025,7 +8910,6 @@ dependencies = [ "datafusion-sql", "datatypes", "fastrand", - "format_num", "futures", "futures-util", "greptime-proto", @@ -9053,9 +8937,7 @@ dependencies = [ "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "statrs", - "stats-cli", "store-api", - "streaming-stats", "substrait 0.12.0", "table", "tokio", @@ -10546,7 +10428,6 @@ dependencies = [ "datatypes", "futures", "lazy_static", - "log-store", "once_cell", "operator", "paste", @@ -10569,7 +10450,6 @@ dependencies = [ "sql", "table", "tokio", - "tokio-test", ] [[package]] @@ -10911,7 +10791,6 @@ dependencies = [ "tokio-postgres-rustls", "tokio-rustls 0.26.0", "tokio-stream", - "tokio-test", "tokio-util", "tonic 0.11.0", "tonic-reflection", @@ -11545,22 +11424,11 @@ dependencies = [ "rand", ] -[[package]] -name = "stats-cli" -version = "3.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8786c4fc8a91bc4fcd90aed33413f79e4dc9811f24ba14d1d59adf57cf1c871" -dependencies = [ - "clap 2.34.0", - "num-traits", -] - [[package]] name = "store-api" version = "0.12.0" dependencies = [ "api", - "aquamarine", "async-stream", "async-trait", "common-base", @@ -11596,15 +11464,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9091b6114800a5f2141aee1d1b9d6ca3592ac062dc5decb3764ec5895a47b4eb" -[[package]] -name = "streaming-stats" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0d670ce4e348a2081843569e0f79b21c99c91bb9028b3b3ecb0f050306de547" -dependencies = [ - "num-traits", -] - [[package]] name = "strfmt" version = "0.2.4" @@ -12562,30 +12421,6 @@ dependencies = [ "syn 2.0.90", ] -[[package]] -name = "tokio-metrics" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eace09241d62c98b7eeb1107d4c5c64ca3bd7da92e8c218c153ab3a78f9be112" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", - "tokio-stream", -] - -[[package]] -name = "tokio-metrics-collector" -version = "0.2.1" -source = "git+https://github.com/MichaelScofield/tokio-metrics-collector.git?rev=89d692d5753d28564a7aac73c6ac5aba22243ba0#89d692d5753d28564a7aac73c6ac5aba22243ba0" -dependencies = [ - "lazy_static", - "parking_lot 0.12.3", - "prometheus", - "tokio", - "tokio-metrics", -] - [[package]] name = "tokio-postgres" version = "0.7.12" @@ -13012,7 +12847,7 @@ dependencies = [ "tracing-core", "tracing-log 0.2.0", "tracing-subscriber", - "web-time 0.2.4", + "web-time", ] [[package]] @@ -13783,16 +13618,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "web-time" -version = "1.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb" -dependencies = [ - "js-sys", - "wasm-bindgen", -] - [[package]] name = "webbrowser" version = "0.8.15" diff --git a/src/cache/Cargo.toml b/src/cache/Cargo.toml index 9a2888e5fc13..07870fa904a5 100644 --- a/src/cache/Cargo.toml +++ b/src/cache/Cargo.toml @@ -11,4 +11,3 @@ common-macro.workspace = true common-meta.workspace = true moka.workspace = true snafu.workspace = true -substrait.workspace = true diff --git a/src/catalog/Cargo.toml b/src/catalog/Cargo.toml index a5ad92e8917c..b7e19a44b9ff 100644 --- a/src/catalog/Cargo.toml +++ b/src/catalog/Cargo.toml @@ -18,7 +18,6 @@ async-stream.workspace = true async-trait = "0.1" bytes.workspace = true common-catalog.workspace = true -common-config.workspace = true common-error.workspace = true common-macro.workspace = true common-meta.workspace = true @@ -58,7 +57,5 @@ catalog = { workspace = true, features = ["testing"] } chrono.workspace = true common-meta = { workspace = true, features = ["testing"] } common-query = { workspace = true, features = ["testing"] } -common-test-util.workspace = true -log-store.workspace = true object-store.workspace = true tokio.workspace = true diff --git a/src/cli/Cargo.toml b/src/cli/Cargo.toml index b49aa00ee2cc..de2abc15f18e 100644 --- a/src/cli/Cargo.toml +++ b/src/cli/Cargo.toml @@ -23,7 +23,6 @@ common-error.workspace = true common-grpc.workspace = true common-macro.workspace = true common-meta.workspace = true -common-options.workspace = true common-procedure.workspace = true common-query.workspace = true common-recordbatch.workspace = true @@ -61,5 +60,4 @@ client = { workspace = true, features = ["testing"] } common-test-util.workspace = true common-version.workspace = true serde.workspace = true -temp-env = "0.3" tempfile.workspace = true diff --git a/src/client/Cargo.toml b/src/client/Cargo.toml index 9d198ab9fbae..f8702fe6ac16 100644 --- a/src/client/Cargo.toml +++ b/src/client/Cargo.toml @@ -42,8 +42,6 @@ tonic.workspace = true [dev-dependencies] common-grpc-expr.workspace = true -datanode.workspace = true -derive-new = "0.5" tracing = "0.1" [dev-dependencies.substrait_proto] diff --git a/src/common/catalog/Cargo.toml b/src/common/catalog/Cargo.toml index 61f49ab0e4b3..051675fe93e1 100644 --- a/src/common/catalog/Cargo.toml +++ b/src/common/catalog/Cargo.toml @@ -8,10 +8,5 @@ license.workspace = true workspace = true [dependencies] -common-error.workspace = true -common-macro.workspace = true -snafu.workspace = true [dev-dependencies] -chrono.workspace = true -tokio.workspace = true diff --git a/src/common/datasource/Cargo.toml b/src/common/datasource/Cargo.toml index 65f1d18a6603..16137e6b3e51 100644 --- a/src/common/datasource/Cargo.toml +++ b/src/common/datasource/Cargo.toml @@ -48,5 +48,4 @@ url = "2.3" [dev-dependencies] common-telemetry.workspace = true common-test-util.workspace = true -dotenv.workspace = true uuid.workspace = true diff --git a/src/common/frontend/Cargo.toml b/src/common/frontend/Cargo.toml index 2aa111fa1af0..7c3b705bddcd 100644 --- a/src/common/frontend/Cargo.toml +++ b/src/common/frontend/Cargo.toml @@ -5,12 +5,7 @@ edition.workspace = true license.workspace = true [dependencies] -api.workspace = true async-trait.workspace = true -common-base.workspace = true common-error.workspace = true common-macro.workspace = true -common-query.workspace = true -session.workspace = true snafu.workspace = true -sql.workspace = true diff --git a/src/common/function/Cargo.toml b/src/common/function/Cargo.toml index 29cefb1e7547..e7cc25ca1325 100644 --- a/src/common/function/Cargo.toml +++ b/src/common/function/Cargo.toml @@ -51,6 +51,5 @@ wkt = { version = "0.11", optional = true } [dev-dependencies] approx = "0.5" -ron = "0.7" serde = { version = "1.0", features = ["derive"] } tokio.workspace = true diff --git a/src/common/runtime/Cargo.toml b/src/common/runtime/Cargo.toml index c249ba221ecd..7a12a03ba9cd 100644 --- a/src/common/runtime/Cargo.toml +++ b/src/common/runtime/Cargo.toml @@ -35,8 +35,6 @@ serde_json.workspace = true snafu.workspace = true tempfile.workspace = true tokio.workspace = true -tokio-metrics = "0.3" -tokio-metrics-collector = { git = "https://github.com/MichaelScofield/tokio-metrics-collector.git", rev = "89d692d5753d28564a7aac73c6ac5aba22243ba0" } tokio-util.workspace = true [dev-dependencies] diff --git a/src/file-engine/Cargo.toml b/src/file-engine/Cargo.toml index f9cd1113f535..1a665d667607 100644 --- a/src/file-engine/Cargo.toml +++ b/src/file-engine/Cargo.toml @@ -38,5 +38,4 @@ tokio.workspace = true [dev-dependencies] api.workspace = true -common-procedure-test.workspace = true common-test-util.workspace = true diff --git a/src/flow/Cargo.toml b/src/flow/Cargo.toml index ed2a1dc1c474..ffba0618daaf 100644 --- a/src/flow/Cargo.toml +++ b/src/flow/Cargo.toml @@ -47,7 +47,6 @@ hydroflow = { git = "https://github.com/GreptimeTeam/hydroflow.git", branch = "m itertools.workspace = true lazy_static.workspace = true meta-client.workspace = true -minstant = "0.1.7" nom = "7.1.3" num-traits = "0.2" operator.workspace = true diff --git a/src/frontend/Cargo.toml b/src/frontend/Cargo.toml index 01f06eb03338..e21819c568f2 100644 --- a/src/frontend/Cargo.toml +++ b/src/frontend/Cargo.toml @@ -25,7 +25,6 @@ common-catalog.workspace = true common-config.workspace = true common-datasource.workspace = true common-error.workspace = true -common-frontend.workspace = true common-function.workspace = true common-grpc.workspace = true common-macro.workspace = true @@ -71,7 +70,6 @@ common-test-util.workspace = true datanode.workspace = true datatypes.workspace = true futures = "0.3" -meta-srv = { workspace = true, features = ["mock"] } serde_json.workspace = true strfmt = "0.2" tower.workspace = true diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index 85aa371594e8..666ac09faa75 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -9,7 +9,6 @@ workspace = true [dependencies] api.workspace = true -aquamarine.workspace = true async-trait.workspace = true base64.workspace = true common-base.workspace = true diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index eecb79440a2e..181ba0f43407 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -13,7 +13,6 @@ workspace = true [dependencies] api.workspace = true -aquamarine.workspace = true async-channel = "1.9" async-stream.workspace = true async-trait = "0.1" @@ -77,7 +76,6 @@ uuid.workspace = true [dev-dependencies] common-function.workspace = true common-meta = { workspace = true, features = ["testing"] } -common-procedure-test.workspace = true common-test-util.workspace = true criterion = "0.4" dotenv.workspace = true diff --git a/src/pipeline/Cargo.toml b/src/pipeline/Cargo.toml index 4657f39a6866..9c26d1a52fa6 100644 --- a/src/pipeline/Cargo.toml +++ b/src/pipeline/Cargo.toml @@ -63,7 +63,6 @@ yaml-rust = "0.4" catalog = { workspace = true, features = ["testing"] } criterion = { version = "0.4", features = ["html_reports"] } rayon = "1.0" -ron = "0.7" serde = { version = "1.0", features = ["derive"] } session = { workspace = true, features = ["testing"] } diff --git a/src/promql/Cargo.toml b/src/promql/Cargo.toml index 4039328528c3..7b51651a7ca9 100644 --- a/src/promql/Cargo.toml +++ b/src/promql/Cargo.toml @@ -22,11 +22,8 @@ futures = "0.3" greptime-proto.workspace = true lazy_static.workspace = true prometheus.workspace = true -promql-parser.workspace = true prost.workspace = true snafu.workspace = true [dev-dependencies] -query.workspace = true -session = { workspace = true, features = ["testing"] } tokio.workspace = true diff --git a/src/query/Cargo.toml b/src/query/Cargo.toml index 863a5a1c33d3..8139ea3aafbb 100644 --- a/src/query/Cargo.toml +++ b/src/query/Cargo.toml @@ -67,13 +67,11 @@ tokio.workspace = true uuid.workspace = true [dev-dependencies] -approx_eq = "0.1" arrow.workspace = true catalog = { workspace = true, features = ["testing"] } common-macro.workspace = true common-query = { workspace = true, features = ["testing"] } fastrand = "2.0" -format_num = "0.1" num = "0.4" num-traits = "0.2" paste = "1.0" @@ -83,8 +81,6 @@ serde.workspace = true serde_json.workspace = true session = { workspace = true, features = ["testing"] } statrs = "0.16" -stats-cli = "3.0" store-api.workspace = true -streaming-stats = "0.2" table = { workspace = true, features = ["testing"] } tokio-stream.workspace = true diff --git a/src/script/Cargo.toml b/src/script/Cargo.toml index 88d10c9509c7..136eb3c4fc4c 100644 --- a/src/script/Cargo.toml +++ b/src/script/Cargo.toml @@ -80,13 +80,11 @@ tokio.workspace = true catalog = { workspace = true, features = ["testing"] } common-test-util.workspace = true criterion = { version = "0.4", features = ["html_reports", "async_tokio"] } -log-store.workspace = true operator.workspace = true rayon = "1.0" ron = "0.7" serde = { version = "1.0", features = ["derive"] } session = { workspace = true, features = ["testing"] } -tokio-test = "0.4" [[bench]] name = "py_benchmark" diff --git a/src/servers/Cargo.toml b/src/servers/Cargo.toml index ddfeaf27bd45..a90fb880e20d 100644 --- a/src/servers/Cargo.toml +++ b/src/servers/Cargo.toml @@ -134,7 +134,6 @@ table.workspace = true tempfile = "3.0.0" tokio-postgres = "0.7" tokio-postgres-rustls = "0.12" -tokio-test = "0.4" [target.'cfg(unix)'.dev-dependencies] pprof = { version = "0.13", features = ["criterion", "flamegraph"] } diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 7c974661e315..1214ae3d4001 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -9,7 +9,6 @@ workspace = true [dependencies] api.workspace = true -aquamarine.workspace = true async-trait.workspace = true common-base.workspace = true common-error.workspace = true From f82af15eba627bfc175b4ccb23c3a5790d016905 Mon Sep 17 00:00:00 2001 From: Lin Yihai Date: Mon, 16 Dec 2024 14:46:38 +0800 Subject: [PATCH 26/59] feat: Add `vector_scalar_mul` function. (#5166) --- src/common/function/src/scalars/vector.rs | 2 + .../function/src/scalars/vector/scalar_mul.rs | 173 ++++++++++++++++++ .../function/vector/vector_scalar.result | 48 +++++ .../common/function/vector/vector_scalar.sql | 12 ++ 4 files changed, 235 insertions(+) create mode 100644 src/common/function/src/scalars/vector/scalar_mul.rs diff --git a/src/common/function/src/scalars/vector.rs b/src/common/function/src/scalars/vector.rs index 0c0428ce9a45..d462b917af59 100644 --- a/src/common/function/src/scalars/vector.rs +++ b/src/common/function/src/scalars/vector.rs @@ -16,6 +16,7 @@ mod convert; mod distance; pub(crate) mod impl_conv; mod scalar_add; +mod scalar_mul; use std::sync::Arc; @@ -36,5 +37,6 @@ impl VectorFunction { // scalar calculation registry.register(Arc::new(scalar_add::ScalarAddFunction)); + registry.register(Arc::new(scalar_mul::ScalarMulFunction)); } } diff --git a/src/common/function/src/scalars/vector/scalar_mul.rs b/src/common/function/src/scalars/vector/scalar_mul.rs new file mode 100644 index 000000000000..3c7fe4c07035 --- /dev/null +++ b/src/common/function/src/scalars/vector/scalar_mul.rs @@ -0,0 +1,173 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::borrow::Cow; +use std::fmt::Display; + +use common_query::error::{InvalidFuncArgsSnafu, Result}; +use common_query::prelude::Signature; +use datatypes::prelude::ConcreteDataType; +use datatypes::scalars::ScalarVectorBuilder; +use datatypes::vectors::{BinaryVectorBuilder, MutableVector, VectorRef}; +use nalgebra::DVectorView; +use snafu::ensure; + +use crate::function::{Function, FunctionContext}; +use crate::helper; +use crate::scalars::vector::impl_conv::{as_veclit, as_veclit_if_const, veclit_to_binlit}; + +const NAME: &str = "vec_scalar_mul"; + +/// Multiples a scalar to each element of a vector. +/// +/// # Example +/// +/// ```sql +/// SELECT vec_to_string(vec_scalar_mul(2, "[1, 2, 3]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [2,4,6] | +/// +---------+ +/// +/// -- 1/scalar to simulate division +/// SELECT vec_to_string(vec_scalar_mul(0.5, "[2, 4, 6]")) as result; +/// +/// +---------+ +/// | result | +/// +---------+ +/// | [1,2,3] | +/// +---------+ +/// ``` +#[derive(Debug, Clone, Default)] +pub struct ScalarMulFunction; + +impl Function for ScalarMulFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::binary_datatype()) + } + + fn signature(&self) -> Signature { + helper::one_of_sigs2( + vec![ConcreteDataType::float64_datatype()], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::binary_datatype(), + ], + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly two, have: {}", + columns.len() + ), + } + ); + let arg0 = &columns[0]; + let arg1 = &columns[1]; + + let len = arg0.len(); + let mut result = BinaryVectorBuilder::with_capacity(len); + if len == 0 { + return Ok(result.to_vector()); + } + + let arg1_const = as_veclit_if_const(arg1)?; + + for i in 0..len { + let arg0 = arg0.get(i).as_f64_lossy(); + let Some(arg0) = arg0 else { + result.push_null(); + continue; + }; + + let arg1 = match arg1_const.as_ref() { + Some(arg1) => Some(Cow::Borrowed(arg1.as_ref())), + None => as_veclit(arg1.get_ref(i))?, + }; + let Some(arg1) = arg1 else { + result.push_null(); + continue; + }; + + let vec = DVectorView::from_slice(&arg1, arg1.len()); + let vec_res = vec.scale(arg0 as _); + + let veclit = vec_res.as_slice(); + let binlit = veclit_to_binlit(veclit); + result.push(Some(&binlit)); + } + + Ok(result.to_vector()) + } +} + +impl Display for ScalarMulFunction { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", NAME.to_ascii_uppercase()) + } +} + +#[cfg(test)] +mod tests { + use std::sync::Arc; + + use datatypes::vectors::{Float32Vector, StringVector}; + + use super::*; + + #[test] + fn test_scalar_mul() { + let func = ScalarMulFunction; + + let input0 = Arc::new(Float32Vector::from(vec![ + Some(2.0), + Some(-0.5), + None, + Some(3.0), + ])); + let input1 = Arc::new(StringVector::from(vec![ + Some("[1.0,2.0,3.0]".to_string()), + Some("[8.0,10.0,12.0]".to_string()), + Some("[7.0,8.0,9.0]".to_string()), + None, + ])); + + let result = func + .eval(FunctionContext::default(), &[input0, input1]) + .unwrap(); + + let result = result.as_ref(); + assert_eq!(result.len(), 4); + assert_eq!( + result.get_ref(0).as_binary().unwrap(), + Some(veclit_to_binlit(&[2.0, 4.0, 6.0]).as_slice()) + ); + assert_eq!( + result.get_ref(1).as_binary().unwrap(), + Some(veclit_to_binlit(&[-4.0, -5.0, -6.0]).as_slice()) + ); + assert!(result.get_ref(2).is_null()); + assert!(result.get_ref(3).is_null()); + } +} diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.result b/tests/cases/standalone/common/function/vector/vector_scalar.result index 5750a0adfdb8..a379c385fa24 100644 --- a/tests/cases/standalone/common/function/vector/vector_scalar.result +++ b/tests/cases/standalone/common/function/vector/vector_scalar.result @@ -46,3 +46,51 @@ SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); | [0,1] | +-------------------------------------------------------------+ +SELECT vec_to_string(vec_scalar_mul(1.0, '[1.0, 2.0]')); + ++--------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(1),Utf8("[1.0, 2.0]"))) | ++--------------------------------------------------------------+ +| [1,2] | ++--------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + ++-----------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),Utf8("[2.0, 4.0]"))) | ++-----------------------------------------------------------------+ +| [-1,-2] | ++-----------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(1.0, parse_vec('[1.0, 2.0]'))); + ++-------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(1),parse_vec(Utf8("[1.0, 2.0]")))) | ++-------------------------------------------------------------------------+ +| [1,2] | ++-------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, parse_vec('[2.0, 4.0]'))); + ++----------------------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),parse_vec(Utf8("[2.0, 4.0]")))) | ++----------------------------------------------------------------------------+ +| [-1,-2] | ++----------------------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(1, '[1.0, 2.0]')); + ++------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Int64(1),Utf8("[1.0, 2.0]"))) | ++------------------------------------------------------------+ +| [1,2] | ++------------------------------------------------------------+ + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + ++-----------------------------------------------------------------+ +| vec_to_string(vec_scalar_mul(Float64(-0.5),Utf8("[2.0, 4.0]"))) | ++-----------------------------------------------------------------+ +| [-1,-2] | ++-----------------------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/vector/vector_scalar.sql b/tests/cases/standalone/common/function/vector/vector_scalar.sql index e438ac6a40ba..2727f2970507 100644 --- a/tests/cases/standalone/common/function/vector/vector_scalar.sql +++ b/tests/cases/standalone/common/function/vector/vector_scalar.sql @@ -9,3 +9,15 @@ SELECT vec_to_string(vec_scalar_add(-1.0, parse_vec('[1.0, 2.0]'))); SELECT vec_to_string(vec_scalar_add(1, '[1.0, 2.0]')); SELECT vec_to_string(vec_scalar_add(-1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(1.0, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); + +SELECT vec_to_string(vec_scalar_mul(1.0, parse_vec('[1.0, 2.0]'))); + +SELECT vec_to_string(vec_scalar_mul(-0.5, parse_vec('[2.0, 4.0]'))); + +SELECT vec_to_string(vec_scalar_mul(1, '[1.0, 2.0]')); + +SELECT vec_to_string(vec_scalar_mul(-0.5, '[2.0, 4.0]')); \ No newline at end of file From 5ffda7e97130c449455ee83f2afc80ee76d4b168 Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Mon, 16 Dec 2024 15:08:07 +0800 Subject: [PATCH 27/59] chore: gauge for flush compaction (#5156) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * add metrics * chore/bench-metrics: Add INFLIGHT_FLUSH_COUNT Metric to Flush Process • Introduced INFLIGHT_FLUSH_COUNT metric to track the number of ongoing flush operations. • Incremented INFLIGHT_FLUSH_COUNT in FlushScheduler to monitor active flushes. • Removed redundant increment of INFLIGHT_FLUSH_COUNT in RegionWorkerLoop to prevent double counting. * chore/bench-metrics: Add Metrics for Compaction and Flush Operations • Introduced INFLIGHT_COMPACTION_COUNT and INFLIGHT_FLUSH_COUNT metrics to track the number of ongoing compaction and flush operations. • Incremented INFLIGHT_COMPACTION_COUNT when scheduling remote and local compaction jobs, and decremented it upon completion. • Added INFLIGHT_FLUSH_COUNT increment and decrement logic around flush tasks to monitor active flush operations. • Removed redundant metric updates in worker.rs and handle_compaction.rs to streamline metric handling. * chore: add metrics for remote compaction jobs * chore: format * chore: also add dashbaord --- grafana/greptimedb.json | 1361 ++++++++--------- src/mito2/src/compaction.rs | 5 +- src/mito2/src/flush.rs | 8 +- src/mito2/src/metrics.rs | 13 + .../src/schedule/remote_job_scheduler.rs | 3 +- 5 files changed, 706 insertions(+), 684 deletions(-) diff --git a/grafana/greptimedb.json b/grafana/greptimedb.json index 86925d53420f..c526373874fa 100644 --- a/grafana/greptimedb.json +++ b/grafana/greptimedb.json @@ -2014,11 +2014,11 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "idelta(greptime_mito_compaction_stage_elapsed_count{stage=\"merge\"}[$__interval])", + "expr": "greptime_mito_inflight_compaction_count", "fullMetaSearch": false, "includeNullMetadata": false, "instant": false, - "legendFormat": "compaction-{{stage}}", + "legendFormat": "compaction-{{instance}}", "range": true, "refId": "A", "useBackend": false @@ -2030,12 +2030,12 @@ }, "disableTextWrap": false, "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, type) (idelta(greptime_mito_flush_elapsed_bucket[$__interval])))", + "expr": "greptime_mito_inflight_flush_count", "fullMetaSearch": false, "hide": false, "includeNullMetadata": true, "instant": false, - "legendFormat": "flush-{{type}}", + "legendFormat": "flush-{{instance}}", "range": true, "refId": "B", "useBackend": false @@ -2707,752 +2707,751 @@ "y": 48 }, "id": 21, - "panels": [ + "panels": [], + "title": "Storage Components", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 49 + }, + "id": 18, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 49 + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{scheme}}-{{operation}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "OpenDAL traffic", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 18, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{scheme}}-{{operation}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "OpenDAL traffic", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 49 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 49 + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OpenDAL operation duration", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "OpenDAL operation duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 43, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" }, - "tooltip": { - "mode": "single", - "sort": "none" + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_object_store_lru_cache_bytes", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache size", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 56 + }, + "id": 43, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 56 + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_object_store_lru_cache_bytes", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 44, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache hit", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 56 + }, + "id": 44, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 63 + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache hit", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "id": 10, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p95", - "range": true, - "refId": "Log Store P95" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p99", - "range": true, - "refId": "Log Store P99" - } - ], - "title": "Log Store op duration seconds", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 63 + }, + "id": 10, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 63 + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p95", + "range": true, + "refId": "Log Store P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - "id": 12, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p99", + "range": true, + "refId": "Log Store P99" + } + ], + "title": "Log Store op duration seconds", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p95", - "range": true, - "refId": "A", - "useBackend": false + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p99", - "range": true, - "refId": "C", - "useBackend": false + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "editorMode": "code", - "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", - "hide": false, - "instant": false, - "legendFormat": "throughput", - "range": true, - "refId": "B" + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } - ], - "title": "WAL write size", - "type": "timeseries" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 63 + }, + "id": 12, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p95", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 70 + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p99", + "range": true, + "refId": "C", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - "id": 37, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "editorMode": "code", + "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "throughput", + "range": true, + "refId": "B" + } + ], + "title": "WAL write size", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false }, - "tooltip": { - "mode": "single", - "sort": "none" + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" } }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{node}}-{{type}}-p99", - "range": true, - "refId": "Log Store P95" - } - ], - "title": "WAL sync duration seconds", - "type": "timeseries" + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 70 + }, + "id": 37, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{node}}-{{type}}-p99", + "range": true, + "refId": "Log Store P95" } ], - "title": "Storage Components", - "type": "row" + "title": "WAL sync duration seconds", + "type": "timeseries" }, { "collapsed": false, @@ -4154,6 +4153,6 @@ "timezone": "", "title": "GreptimeDB", "uid": "e7097237-669b-4f8d-b751-13067afbfb68", - "version": 16, + "version": 17, "weekStart": "" } diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs index 2b70f455d815..5236e0d616dc 100644 --- a/src/mito2/src/compaction.rs +++ b/src/mito2/src/compaction.rs @@ -53,7 +53,7 @@ use crate::error::{ RegionTruncatedSnafu, RemoteCompactionSnafu, Result, TimeRangePredicateOverflowSnafu, TimeoutSnafu, }; -use crate::metrics::COMPACTION_STAGE_ELAPSED; +use crate::metrics::{COMPACTION_STAGE_ELAPSED, INFLIGHT_COMPACTION_COUNT}; use crate::read::projection::ProjectionMapper; use crate::read::scan_region::ScanInput; use crate::read::seq_scan::SeqScan; @@ -340,6 +340,7 @@ impl CompactionScheduler { "Scheduled remote compaction job {} for region {}", job_id, region_id ); + INFLIGHT_COMPACTION_COUNT.inc(); return Ok(()); } Err(e) => { @@ -384,7 +385,9 @@ impl CompactionScheduler { // Submit the compaction task. self.scheduler .schedule(Box::pin(async move { + INFLIGHT_COMPACTION_COUNT.inc(); local_compaction_task.run().await; + INFLIGHT_COMPACTION_COUNT.dec(); })) .map_err(|e| { error!(e; "Failed to submit compaction request for region {}", region_id); diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs index 9606e92d04db..09f45ca4f724 100644 --- a/src/mito2/src/flush.rs +++ b/src/mito2/src/flush.rs @@ -32,7 +32,10 @@ use crate::error::{ Error, FlushRegionSnafu, RegionClosedSnafu, RegionDroppedSnafu, RegionTruncatedSnafu, Result, }; use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList}; -use crate::metrics::{FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL}; +use crate::metrics::{ + FLUSH_BYTES_TOTAL, FLUSH_ELAPSED, FLUSH_ERRORS_TOTAL, FLUSH_REQUESTS_TOTAL, + INFLIGHT_FLUSH_COUNT, +}; use crate::read::Source; use crate::region::options::IndexOptions; use crate::region::version::{VersionControlData, VersionControlRef}; @@ -261,7 +264,9 @@ impl RegionFlushTask { let version_data = version_control.current(); Box::pin(async move { + INFLIGHT_FLUSH_COUNT.inc(); self.do_flush(version_data).await; + INFLIGHT_FLUSH_COUNT.dec(); }) } @@ -530,6 +535,7 @@ impl FlushScheduler { self.region_status.remove(®ion_id); return Err(e); } + flush_status.flushing = true; Ok(()) diff --git a/src/mito2/src/metrics.rs b/src/mito2/src/metrics.rs index e7c1c7272ef8..5a5d76da4c0b 100644 --- a/src/mito2/src/metrics.rs +++ b/src/mito2/src/metrics.rs @@ -75,6 +75,12 @@ lazy_static! { /// Histogram of flushed bytes. pub static ref FLUSH_BYTES_TOTAL: IntCounter = register_int_counter!("greptime_mito_flush_bytes_total", "mito flush bytes total").unwrap(); + /// Gauge for inflight compaction tasks. + pub static ref INFLIGHT_FLUSH_COUNT: IntGauge = + register_int_gauge!( + "greptime_mito_inflight_flush_count", + "inflight flush count", + ).unwrap(); // ------ End of flush related metrics @@ -124,6 +130,13 @@ lazy_static! { /// Counter of failed compaction task. pub static ref COMPACTION_FAILURE_COUNT: IntCounter = register_int_counter!("greptime_mito_compaction_failure_total", "mito compaction failure total").unwrap(); + + /// Gauge for inflight compaction tasks. + pub static ref INFLIGHT_COMPACTION_COUNT: IntGauge = + register_int_gauge!( + "greptime_mito_inflight_compaction_count", + "inflight compaction count", + ).unwrap(); // ------- End of compaction metrics. // Query metrics. diff --git a/src/mito2/src/schedule/remote_job_scheduler.rs b/src/mito2/src/schedule/remote_job_scheduler.rs index 8f51a774d50d..bfe31ef04177 100644 --- a/src/mito2/src/schedule/remote_job_scheduler.rs +++ b/src/mito2/src/schedule/remote_job_scheduler.rs @@ -27,7 +27,7 @@ use crate::compaction::compactor::CompactionRegion; use crate::compaction::picker::PickerOutput; use crate::error::{CompactRegionSnafu, Error, ParseJobIdSnafu, Result}; use crate::manifest::action::RegionEdit; -use crate::metrics::COMPACTION_FAILURE_COUNT; +use crate::metrics::{COMPACTION_FAILURE_COUNT, INFLIGHT_COMPACTION_COUNT}; use crate::request::{ BackgroundNotify, CompactionFailed, CompactionFinished, OutputTx, WorkerRequest, }; @@ -145,6 +145,7 @@ impl DefaultNotifier { #[async_trait::async_trait] impl Notifier for DefaultNotifier { async fn notify(&self, result: RemoteJobResult, waiters: Vec) { + INFLIGHT_COMPACTION_COUNT.dec(); match result { RemoteJobResult::CompactionJobResult(result) => { let notify = { From 54698325b6453f87dda5b46779f2ebb621e25ae2 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Mon, 16 Dec 2024 17:21:00 +0800 Subject: [PATCH 28/59] feat: introduce SKIPPING index (part 1) (#5155) * skip index parser Signed-off-by: Ruihang Xia * wip: sqlness Signed-off-by: Ruihang Xia * impl show create part Signed-off-by: Ruihang Xia * add empty line Signed-off-by: Ruihang Xia * change keyword to SKIPPING INDEX Signed-off-by: Ruihang Xia * rename local variables Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- src/api/src/v1/column_def.rs | 12 +- src/datatypes/src/error.rs | 9 +- src/datatypes/src/schema.rs | 7 +- src/datatypes/src/schema/column_schema.rs | 106 ++++++++++++++++++ src/operator/src/statement/ddl.rs | 3 +- src/query/src/error.rs | 11 +- src/query/src/sql/show_create_table.rs | 34 +++++- src/sql/src/error.rs | 9 +- src/sql/src/parsers/create_parser.rs | 105 ++++++++++++++++- src/sql/src/parsers/utils.rs | 13 ++- src/sql/src/statements.rs | 10 +- src/sql/src/statements/create.rs | 26 ++++- .../create/create_with_skip_index.result | 33 ++++++ .../common/create/create_with_skip_index.sql | 14 +++ 14 files changed, 371 insertions(+), 21 deletions(-) create mode 100644 tests/cases/standalone/common/create/create_with_skip_index.result create mode 100644 tests/cases/standalone/common/create/create_with_skip_index.sql diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index f026d3f6f97f..77dcd2c62190 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, FulltextAnalyzer, FulltextOptions, COMMENT_KEY, - FULLTEXT_KEY, INVERTED_INDEX_KEY, + FULLTEXT_KEY, INVERTED_INDEX_KEY, SKIPPING_INDEX_KEY, }; use greptime_proto::v1::Analyzer; use snafu::ResultExt; @@ -29,6 +29,8 @@ use crate::v1::{ColumnDef, ColumnOptions, SemanticType}; const FULLTEXT_GRPC_KEY: &str = "fulltext"; /// Key used to store inverted index options in gRPC column options. const INVERTED_INDEX_GRPC_KEY: &str = "inverted_index"; +/// Key used to store skip index options in gRPC column options. +const SKIPPING_INDEX_GRPC_KEY: &str = "skipping_index"; /// Tries to construct a `ColumnSchema` from the given `ColumnDef`. pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { @@ -60,6 +62,9 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { if let Some(inverted_index) = options.options.get(INVERTED_INDEX_GRPC_KEY) { metadata.insert(INVERTED_INDEX_KEY.to_string(), inverted_index.clone()); } + if let Some(skipping_index) = options.options.get(SKIPPING_INDEX_GRPC_KEY) { + metadata.insert(SKIPPING_INDEX_KEY.to_string(), skipping_index.clone()); + } } ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) @@ -84,6 +89,11 @@ pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option StatusCode::InvalidArguments, + | InvalidFulltextOption { .. } + | InvalidSkippingIndexOption { .. } => StatusCode::InvalidArguments, ValueExceedsPrecision { .. } | CastType { .. } diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index 2eaa0254fbee..c537a4608b42 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -28,10 +28,11 @@ use snafu::{ensure, ResultExt}; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; use crate::prelude::ConcreteDataType; pub use crate::schema::column_schema::{ - ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, + ColumnSchema, FulltextAnalyzer, FulltextOptions, Metadata, SkippingIndexOptions, COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, - TIME_INDEX_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, FULLTEXT_KEY, INVERTED_INDEX_KEY, + SKIPPING_INDEX_KEY, TIME_INDEX_KEY, }; pub use crate::schema::constraint::ColumnDefaultConstraint; pub use crate::schema::raw::RawSchema; diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index c1e2df846918..aee9efd9625d 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -39,12 +39,20 @@ const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint"; pub const FULLTEXT_KEY: &str = "greptime:fulltext"; /// Key used to store whether the column has inverted index in arrow field's metadata. pub const INVERTED_INDEX_KEY: &str = "greptime:inverted_index"; +/// Key used to store skip options in arrow field's metadata. +pub const SKIPPING_INDEX_KEY: &str = "greptime:skipping_index"; /// Keys used in fulltext options pub const COLUMN_FULLTEXT_CHANGE_OPT_KEY_ENABLE: &str = "enable"; pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; +/// Keys used in SKIPPING index options +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY: &str = "granularity"; +pub const COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE: &str = "type"; + +pub const DEFAULT_GRANULARITY: u32 = 10240; + /// Schema of a column, used as an immutable struct. #[derive(Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct ColumnSchema { @@ -298,6 +306,34 @@ impl ColumnSchema { ); Ok(()) } + + /// Retrieves the skipping index options for the column. + pub fn skipping_index_options(&self) -> Result> { + match self.metadata.get(SKIPPING_INDEX_KEY) { + None => Ok(None), + Some(json) => { + let options = + serde_json::from_str(json).context(error::DeserializeSnafu { json })?; + Ok(Some(options)) + } + } + } + + pub fn with_skipping_options(mut self, options: SkippingIndexOptions) -> Result { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(&options).context(error::SerializeSnafu)?, + ); + Ok(self) + } + + pub fn set_skipping_options(&mut self, options: &SkippingIndexOptions) -> Result<()> { + self.metadata.insert( + SKIPPING_INDEX_KEY.to_string(), + serde_json::to_string(options).context(error::SerializeSnafu)?, + ); + Ok(()) + } } /// Column extended type set in column schema's metadata. @@ -495,6 +531,76 @@ impl fmt::Display for FulltextAnalyzer { } } +/// Skipping options for a column. +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default, Visit, VisitMut)] +#[serde(rename_all = "kebab-case")] +pub struct SkippingIndexOptions { + /// The granularity of the skip index. + pub granularity: u32, + /// The type of the skip index. + #[serde(default)] + pub index_type: SkipIndexType, +} + +impl fmt::Display for SkippingIndexOptions { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "granularity={}", self.granularity)?; + write!(f, ", index_type={}", self.index_type)?; + Ok(()) + } +} + +/// Skip index types. +#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, Visit, VisitMut)] +pub enum SkipIndexType { + #[default] + BloomFilter, +} + +impl fmt::Display for SkipIndexType { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + SkipIndexType::BloomFilter => write!(f, "BLOOM"), + } + } +} + +impl TryFrom> for SkippingIndexOptions { + type Error = Error; + + fn try_from(options: HashMap) -> Result { + // Parse granularity with default value 1 + let granularity = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY) { + Some(value) => value.parse::().map_err(|_| { + error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid granularity: {value}, expected: positive integer"), + } + .build() + })?, + None => DEFAULT_GRANULARITY, + }; + + // Parse index type with default value BloomFilter + let index_type = match options.get(COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE) { + Some(typ) => match typ.to_ascii_uppercase().as_str() { + "BLOOM" => SkipIndexType::BloomFilter, + _ => { + return error::InvalidSkippingIndexOptionSnafu { + msg: format!("Invalid index type: {typ}, expected: 'BLOOM'"), + } + .fail(); + } + }, + None => SkipIndexType::default(), + }; + + Ok(SkippingIndexOptions { + granularity, + index_type, + }) + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/src/operator/src/statement/ddl.rs b/src/operator/src/statement/ddl.rs index ed96ca6f1833..eba88ee44d8a 100644 --- a/src/operator/src/statement/ddl.rs +++ b/src/operator/src/statement/ddl.rs @@ -271,7 +271,8 @@ impl StatementExecutor { table_info.ident.table_id = table_id; - let table_info = Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); + let table_info: Arc = + Arc::new(table_info.try_into().context(CreateTableInfoSnafu)?); create_table.table_id = Some(api::v1::TableId { id: table_id }); let table = DistTable::table(table_info); diff --git a/src/query/src/error.rs b/src/query/src/error.rs index 7e246d11c332..e696008cf546 100644 --- a/src/query/src/error.rs +++ b/src/query/src/error.rs @@ -316,6 +316,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to get SKIPPING index options"))] + GetSkippingIndexOptions { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -366,7 +373,9 @@ impl ErrorExt for Error { MissingTableMutationHandler { .. } => StatusCode::Unexpected, GetRegionMetadata { .. } => StatusCode::RegionNotReady, TableReadOnly { .. } => StatusCode::Unsupported, - GetFulltextOptions { source, .. } => source.status_code(), + GetFulltextOptions { source, .. } | GetSkippingIndexOptions { source, .. } => { + source.status_code() + } } } diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index ca69dfc5e69e..b903509d2270 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -19,7 +19,8 @@ use std::collections::HashMap; use common_meta::SchemaOptions; use datatypes::schema::{ ColumnDefaultConstraint, ColumnSchema, SchemaRef, COLUMN_FULLTEXT_OPT_KEY_ANALYZER, - COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COMMENT_KEY, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, COMMENT_KEY, }; use snafu::ResultExt; use sql::ast::{ColumnDef, ColumnOption, ColumnOptionDef, Expr, Ident, ObjectName}; @@ -32,7 +33,8 @@ use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY}; use crate::error::{ - ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, Result, SqlSnafu, + ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, + GetSkippingIndexOptionsSnafu, Result, SqlSnafu, }; /// Generates CREATE TABLE options from given table metadata and schema-level options. @@ -115,6 +117,23 @@ fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result StatusCode::Unsupported, PermissionDenied { .. } => StatusCode::PermissionDenied, - SetFulltextOption { .. } => StatusCode::Unexpected, + SetFulltextOption { .. } | SetSkippingIndexOption { .. } => StatusCode::Unexpected, } } diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index bb9aadadb703..f40ecb7b6efd 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -36,7 +36,9 @@ use crate::error::{ SyntaxSnafu, UnexpectedSnafu, UnsupportedSnafu, }; use crate::parser::{ParserContext, FLOW}; -use crate::parsers::utils::validate_column_fulltext_create_option; +use crate::parsers::utils::{ + validate_column_fulltext_create_option, validate_column_skipping_index_create_option, +}; use crate::statements::create::{ Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, Partitions, TableConstraint, VECTOR_OPT_DIM, @@ -53,6 +55,7 @@ pub const SINK: &str = "SINK"; pub const EXPIRE: &str = "EXPIRE"; pub const AFTER: &str = "AFTER"; pub const INVERTED: &str = "INVERTED"; +pub const SKIPPING: &str = "SKIPPING"; const DB_OPT_KEY_TTL: &str = "ttl"; @@ -701,6 +704,49 @@ impl<'a> ParserContext<'a> { column_extensions.vector_options = Some(options.into()); } + let mut is_index_declared = false; + + if let Token::Word(word) = parser.peek_token().token + && word.value.eq_ignore_ascii_case(SKIPPING) + { + parser.next_token(); + // Consume `INDEX` keyword + ensure!( + parser.parse_keyword(Keyword::INDEX), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "expect INDEX after SKIPPING keyword", + } + ); + ensure!( + column_extensions.skipping_index_options.is_none(), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "duplicated SKIPPING index option", + } + ); + + let options = parser + .parse_options(Keyword::WITH) + .context(error::SyntaxSnafu)? + .into_iter() + .map(parse_option_string) + .collect::>>()?; + + for key in options.keys() { + ensure!( + validate_column_skipping_index_create_option(key), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: format!("invalid SKIP option: {key}"), + } + ); + } + + column_extensions.skipping_index_options = Some(options.into()); + is_index_declared |= true; + } + if parser.parse_keyword(Keyword::FULLTEXT) { ensure!( column_extensions.fulltext_options.is_none(), @@ -738,10 +784,10 @@ impl<'a> ParserContext<'a> { } column_extensions.fulltext_options = Some(options.into()); - Ok(true) - } else { - Ok(false) + is_index_declared |= true; } + + Ok(is_index_declared) } fn parse_optional_table_constraint(&mut self) -> Result> { @@ -2103,6 +2149,57 @@ CREATE TABLE log ( .contains("invalid FULLTEXT option")); } + #[test] + fn test_parse_create_table_skip_options() { + let sql = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX WITH (granularity='8192', type='bloom'), +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(!col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + + let sql = r" + CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT SKIPPING INDEX, + )"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + + if let Statement::CreateTable(c) = &result[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(col + .extensions + .skipping_index_options + .as_ref() + .unwrap() + .is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + } + #[test] fn test_parse_create_view_with_columns() { let sql = "CREATE VIEW test () AS SELECT * FROM NUMBERS"; diff --git a/src/sql/src/parsers/utils.rs b/src/sql/src/parsers/utils.rs index ae5146d7ee7b..f7eefc4b9562 100644 --- a/src/sql/src/parsers/utils.rs +++ b/src/sql/src/parsers/utils.rs @@ -26,7 +26,10 @@ use datafusion_expr::{AggregateUDF, ScalarUDF, TableSource, WindowUDF}; use datafusion_sql::planner::{ContextProvider, SqlToRel}; use datafusion_sql::TableReference; use datatypes::arrow::datatypes::DataType; -use datatypes::schema::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; +use datatypes::schema::{ + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, +}; use snafu::ResultExt; use crate::error::{ @@ -119,3 +122,11 @@ pub fn validate_column_fulltext_create_option(key: &str) -> bool { ] .contains(&key) } + +pub fn validate_column_skipping_index_create_option(key: &str) -> bool { + [ + COLUMN_SKIPPING_INDEX_OPT_KEY_GRANULARITY, + COLUMN_SKIPPING_INDEX_OPT_KEY_TYPE, + ] + .contains(&key) +} diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 25cc3bf7e5be..00196ed5313b 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -58,7 +58,8 @@ use crate::error::{ self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, DatatypeSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, InvalidUnaryOpSnafu, ParseSqlValueSnafu, Result, SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, - TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, UnsupportedUnaryOpSnafu, + SetSkippingIndexOptionSnafu, TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, + UnsupportedUnaryOpSnafu, }; use crate::statements::create::Column; pub use crate::statements::option_map::OptionMap; @@ -513,6 +514,12 @@ pub fn column_to_schema( .context(SetFulltextOptionSnafu)?; } + if let Some(options) = column.extensions.build_skipping_index_options()? { + column_schema = column_schema + .with_skipping_options(options) + .context(SetSkippingIndexOptionSnafu)?; + } + Ok(column_schema) } @@ -1519,6 +1526,7 @@ mod tests { .into(), ), vector_options: None, + skipping_index_options: None, }, }; diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index e4ea46572e5f..3ea265fb7f40 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -16,7 +16,7 @@ use std::collections::HashMap; use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; -use datatypes::schema::FulltextOptions; +use datatypes::schema::{FulltextOptions, SkippingIndexOptions}; use itertools::Itertools; use serde::Serialize; use snafu::ResultExt; @@ -24,7 +24,7 @@ use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, Value as SqlValue}; -use crate::error::{Result, SetFulltextOptionSnafu}; +use crate::error::{Result, SetFulltextOptionSnafu, SetSkippingIndexOptionSnafu}; use crate::statements::statement::Statement; use crate::statements::OptionMap; @@ -116,6 +116,8 @@ pub struct ColumnExtensions { pub fulltext_options: Option, /// Vector options. pub vector_options: Option, + /// Skipping index options. + pub skipping_index_options: Option, } impl Column { @@ -158,6 +160,15 @@ impl Display for Column { write!(f, " FULLTEXT")?; } } + + if let Some(skipping_index_options) = &self.extensions.skipping_index_options { + if !skipping_index_options.is_empty() { + let options = skipping_index_options.kv_pairs(); + write!(f, " SKIPPING INDEX WITH({})", format_list_comma!(options))?; + } else { + write!(f, " SKIPPING INDEX")?; + } + } Ok(()) } } @@ -171,6 +182,17 @@ impl ColumnExtensions { let options: HashMap = options.clone().into_map(); Ok(Some(options.try_into().context(SetFulltextOptionSnafu)?)) } + + pub fn build_skipping_index_options(&self) -> Result> { + let Some(options) = self.skipping_index_options.as_ref() else { + return Ok(None); + }; + + let options: HashMap = options.clone().into_map(); + Ok(Some( + options.try_into().context(SetSkippingIndexOptionSnafu)?, + )) + } } #[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Serialize)] diff --git a/tests/cases/standalone/common/create/create_with_skip_index.result b/tests/cases/standalone/common/create/create_with_skip_index.result new file mode 100644 index 000000000000..00dd24dc6c9a --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.result @@ -0,0 +1,33 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +Affected Rows: 0 + +show +create table + skipping_table; + ++----------------+---------------------------------------------------------------------------------+ +| Table | Create Table | ++----------------+---------------------------------------------------------------------------------+ +| skipping_table | CREATE TABLE IF NOT EXISTS "skipping_table" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "id" STRING NULL SKIPPING INDEX WITH(granularity = '10240', type = 'BLOOM'), | +| | "name" STRING NULL SKIPPING INDEX WITH(granularity = '8192', type = 'BLOOM'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+---------------------------------------------------------------------------------+ + +drop table skipping_table; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/create/create_with_skip_index.sql b/tests/cases/standalone/common/create/create_with_skip_index.sql new file mode 100644 index 000000000000..0558936699a4 --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_skip_index.sql @@ -0,0 +1,14 @@ +create table + skipping_table ( + ts timestamp time index, + id string skipping index, + `name` string skipping index + with + (granularity = 8192), + ); + +show +create table + skipping_table; + +drop table skipping_table; From 88f7075a2a09b0cf3abcff7407ba7737e73b4861 Mon Sep 17 00:00:00 2001 From: ZonaHe Date: Mon, 16 Dec 2024 18:56:41 +0800 Subject: [PATCH 29/59] feat: update dashboard to v0.7.3 (#5172) Co-authored-by: sunchanglong --- src/servers/dashboard/VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/servers/dashboard/VERSION b/src/servers/dashboard/VERSION index 2c0a9c7b7754..3d105a6fd8ce 100644 --- a/src/servers/dashboard/VERSION +++ b/src/servers/dashboard/VERSION @@ -1 +1 @@ -v0.7.2 +v0.7.3 From acedff030b7f827706e8cbe52123b94bf5167663 Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Mon, 16 Dec 2024 19:47:18 +0800 Subject: [PATCH 30/59] chore: add nix-shell configure for a minimal environment for development (#5175) * chore: add nix-shell development environment * chore: add rust-analyzer * chore: use .envrc as a private file --- .gitignore | 6 +++++- shell.nix | 22 ++++++++++++++++++++++ 2 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 shell.nix diff --git a/.gitignore b/.gitignore index c1b0a8961845..5823287889aa 100644 --- a/.gitignore +++ b/.gitignore @@ -47,6 +47,10 @@ benchmarks/data venv/ -# Fuzz tests +# Fuzz tests tests-fuzz/artifacts/ tests-fuzz/corpus/ + +# Nix +.direnv +.envrc diff --git a/shell.nix b/shell.nix new file mode 100644 index 000000000000..b255fe845c76 --- /dev/null +++ b/shell.nix @@ -0,0 +1,22 @@ +let + nixpkgs = fetchTarball "https://github.com/NixOS/nixpkgs/tarball/nixos-unstable"; + fenix = import (fetchTarball "https://github.com/nix-community/fenix/archive/main.tar.gz") {}; + pkgs = import nixpkgs { config = {}; overlays = []; }; +in + +pkgs.mkShellNoCC { + packages = with pkgs; [ + git + clang + gcc + mold + libgit2 + protobuf + (fenix.fromToolchainFile { + dir = ./.; + }) + fenix.rust-analyzer + cargo-nextest + ]; + +} From 043d0bd7c23bd49f5ed614eedadb06090f974530 Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Mon, 16 Dec 2024 20:25:23 +0800 Subject: [PATCH 31/59] test: flow rebuild (#5162) * tests: rebuild flow * tests: more rebuild * tests: restart * chore: drop clean --- .../common/flow/flow_rebuild.result | 578 ++++++++++++++++++ .../standalone/common/flow/flow_rebuild.sql | 319 ++++++++++ 2 files changed, 897 insertions(+) create mode 100644 tests/cases/standalone/common/flow/flow_rebuild.result create mode 100644 tests/cases/standalone/common/flow/flow_rebuild.sql diff --git a/tests/cases/standalone/common/flow/flow_rebuild.result b/tests/cases/standalone/common/flow/flow_rebuild.result new file mode 100644 index 000000000000..67fd43a03288 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_rebuild.result @@ -0,0 +1,578 @@ +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- combination of different order of rebuild input table/flow +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +-- test again, this time with db restart +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- combination of different order of rebuild input table/flow +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (26, "2021-07-01 00:00:02.000"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 2 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +Affected Rows: 0 + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +Affected Rows: 3 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + ++-----------------------------------------+ +| ADMIN FLUSH_FLOW('test_wildcard_basic') | ++-----------------------------------------+ +| FLOW_FLUSHED | ++-----------------------------------------+ + +SELECT wildcard FROM out_basic; + ++----------+ +| wildcard | ++----------+ +| 3 | ++----------+ + +DROP FLOW test_wildcard_basic; + +Affected Rows: 0 + +DROP TABLE input_basic; + +Affected Rows: 0 + +DROP TABLE out_basic; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/flow/flow_rebuild.sql b/tests/cases/standalone/common/flow/flow_rebuild.sql new file mode 100644 index 000000000000..288d6f1f03b6 --- /dev/null +++ b/tests/cases/standalone/common/flow/flow_rebuild.sql @@ -0,0 +1,319 @@ +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; + +DROP FLOW test_wildcard_basic; + +-- combination of different order of rebuild input table/flow + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; +DROP FLOW test_wildcard_basic; +DROP TABLE out_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE out_basic; + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +-- test again, this time with db restart +DROP TABLE input_basic; +DROP TABLE out_basic; +DROP FLOW test_wildcard_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; + +DROP FLOW test_wildcard_basic; + +-- combination of different order of rebuild input table/flow + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (26, "2021-07-01 00:00:02.000"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- this is expected to be the same as above("2") since the new `input_basic` table +-- have different table id, so is a different table +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +-- recreate flow so that it use new table id +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +-- 3 is also expected, since flow don't have persisent state +SELECT wildcard FROM out_basic; + +DROP TABLE input_basic; +DROP FLOW test_wildcard_basic; +DROP TABLE out_basic; + +CREATE TABLE input_basic ( + number INT, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, + PRIMARY KEY(number), + TIME INDEX(ts) +); + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"); + + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE out_basic; + +CREATE FLOW test_wildcard_basic sink TO out_basic AS +SELECT + COUNT(*) as wildcard +FROM + input_basic; + +-- SQLNESS ARG restart=true +INSERT INTO + input_basic +VALUES + (23, "2021-07-01 00:00:01.000"), + (24, "2021-07-01 00:00:01.500"), + (25, "2021-07-01 00:00:01.700"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('test_wildcard_basic'); + +SELECT wildcard FROM out_basic; + +DROP FLOW test_wildcard_basic; + +DROP TABLE input_basic; + +DROP TABLE out_basic; From d0245473a916e968b2cba827b0f755e946845570 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Mon, 16 Dec 2024 22:01:40 +0800 Subject: [PATCH 32/59] fix: correct `set_region_role_state_gracefully` behaviors (#5171) * fix: reduce default max rows for fuzz testing * chore: remove Postgres setup from fuzz test workflow * chore(fuzz): increase resource limits for GreptimeDB cluster * chore(fuzz): increase resource limits for kafka * fix: correct `set_region_role_state_gracefully` behaviors * chore: remove Postgres setup from fuzz test workflow * chore(fuzz): redue resource limits for GreptimeDB & kafka --- .github/actions/setup-kafka-cluster/action.yml | 2 ++ .github/workflows/develop.yml | 4 ---- src/metric-engine/src/engine.rs | 8 +++++++- src/metric-engine/src/engine/catchup.rs | 3 +++ src/mito2/src/worker/handle_catchup.rs | 3 ++- tests-fuzz/src/utils.rs | 2 +- 6 files changed, 15 insertions(+), 7 deletions(-) diff --git a/.github/actions/setup-kafka-cluster/action.yml b/.github/actions/setup-kafka-cluster/action.yml index b8a73394235a..22b438995740 100644 --- a/.github/actions/setup-kafka-cluster/action.yml +++ b/.github/actions/setup-kafka-cluster/action.yml @@ -18,6 +18,8 @@ runs: --set controller.replicaCount=${{ inputs.controller-replicas }} \ --set controller.resources.requests.cpu=50m \ --set controller.resources.requests.memory=128Mi \ + --set controller.resources.limits.cpu=2000m \ + --set controller.resources.limits.memory=2Gi \ --set listeners.controller.protocol=PLAINTEXT \ --set listeners.client.protocol=PLAINTEXT \ --create-namespace \ diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 6eccbe65b811..8939453f9dd9 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -323,8 +323,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: @@ -474,8 +472,6 @@ jobs: uses: ./.github/actions/setup-kafka-cluster - name: Setup Etcd cluser uses: ./.github/actions/setup-etcd-cluster - - name: Setup Postgres cluser - uses: ./.github/actions/setup-postgres-cluster # Prepares for fuzz tests - uses: arduino/setup-protoc@v3 with: diff --git a/src/metric-engine/src/engine.rs b/src/metric-engine/src/engine.rs index 86b64ddfae2a..15b94701139b 100644 --- a/src/metric-engine/src/engine.rs +++ b/src/metric-engine/src/engine.rs @@ -210,7 +210,6 @@ impl RegionEngine for MetricEngine { for x in [ utils::to_metadata_region_id(region_id), utils::to_data_region_id(region_id), - region_id, ] { if let Err(e) = self.inner.mito.set_region_role(x, role) && e.status_code() != StatusCode::RegionNotFound @@ -226,6 +225,13 @@ impl RegionEngine for MetricEngine { region_id: RegionId, region_role_state: SettableRegionRoleState, ) -> std::result::Result { + self.inner + .mito + .set_region_role_state_gracefully( + utils::to_metadata_region_id(region_id), + region_role_state, + ) + .await?; self.inner .mito .set_region_role_state_gracefully(region_id, region_role_state) diff --git a/src/metric-engine/src/engine/catchup.rs b/src/metric-engine/src/engine/catchup.rs index 4b1268c049b5..783e1f009c0a 100644 --- a/src/metric-engine/src/engine/catchup.rs +++ b/src/metric-engine/src/engine/catchup.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +use common_telemetry::debug; use snafu::ResultExt; use store_api::region_engine::RegionEngine; use store_api::region_request::{AffectedRows, RegionCatchupRequest, RegionRequest}; @@ -35,6 +36,7 @@ impl MetricEngineInner { } let metadata_region_id = utils::to_metadata_region_id(region_id); // TODO(weny): improve the catchup, we can read the wal entries only once. + debug!("Catchup metadata region {metadata_region_id}"); self.mito .handle_request( metadata_region_id, @@ -48,6 +50,7 @@ impl MetricEngineInner { .context(MitoCatchupOperationSnafu)?; let data_region_id = utils::to_data_region_id(region_id); + debug!("Catchup data region {data_region_id}"); self.mito .handle_request( data_region_id, diff --git a/src/mito2/src/worker/handle_catchup.rs b/src/mito2/src/worker/handle_catchup.rs index f0fd6b05503c..8992621dd724 100644 --- a/src/mito2/src/worker/handle_catchup.rs +++ b/src/mito2/src/worker/handle_catchup.rs @@ -16,8 +16,8 @@ use std::sync::Arc; -use common_telemetry::info; use common_telemetry::tracing::warn; +use common_telemetry::{debug, info}; use snafu::ensure; use store_api::logstore::LogStore; use store_api::region_engine::RegionRole; @@ -40,6 +40,7 @@ impl RegionWorkerLoop { }; if region.is_writable() { + debug!("Region {region_id} is writable, skip catchup"); return Ok(0); } // Note: Currently, We protect the split brain by ensuring the mutable table is empty. diff --git a/tests-fuzz/src/utils.rs b/tests-fuzz/src/utils.rs index 743347978924..84222f6d5a58 100644 --- a/tests-fuzz/src/utils.rs +++ b/tests-fuzz/src/utils.rs @@ -142,7 +142,7 @@ macro_rules! make_get_from_env_helper { make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ALTER_ACTIONS, 256); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_INSERT_ACTIONS, 8); -make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 2048); +make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_ROWS, 512); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_TABLES, 64); make_get_from_env_helper!(GT_FUZZ_INPUT_MAX_COLUMNS, 32); From 8a5384697b7ae3f1ef1c988a27179ce0eee89a35 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Tue, 17 Dec 2024 09:45:50 +0800 Subject: [PATCH 33/59] chore: add aquamarine to dep lists (#5181) --- Cargo.lock | 36 ++++++++++++++++++++++++++++++++++++ src/metric-engine/Cargo.toml | 1 + src/mito2/Cargo.toml | 1 + src/store-api/Cargo.toml | 1 + 4 files changed, 39 insertions(+) diff --git a/Cargo.lock b/Cargo.lock index df817dc201cd..1fa61c8c6f47 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -222,6 +222,20 @@ dependencies = [ "num-traits", ] +[[package]] +name = "aquamarine" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1da02abba9f9063d786eab1509833ebb2fac0f966862ca59439c76b9c566760" +dependencies = [ + "include_dir", + "itertools 0.10.5", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 1.0.109", +] + [[package]] name = "arbitrary" version = "1.3.2" @@ -5166,6 +5180,25 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb56e1aa765b4b4f3aadfab769793b7087bb03a4ea4920644a6d238e2df5b9ed" +[[package]] +name = "include_dir" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "923d117408f1e49d914f1a379a309cffe4f18c05cf4e3d12e613a15fc81bd0dd" +dependencies = [ + "include_dir_macros", +] + +[[package]] +name = "include_dir_macros" +version = "0.7.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7cab85a7ed0bd5f0e76d93846e0147172bed2e2d3f859bcc33a8d9699cad1a75" +dependencies = [ + "proc-macro2", + "quote", +] + [[package]] name = "index" version = "0.12.0" @@ -6438,6 +6471,7 @@ name = "metric-engine" version = "0.12.0" dependencies = [ "api", + "aquamarine", "async-trait", "base64 0.21.7", "common-base", @@ -6531,6 +6565,7 @@ name = "mito2" version = "0.12.0" dependencies = [ "api", + "aquamarine", "async-channel 1.9.0", "async-stream", "async-trait", @@ -11429,6 +11464,7 @@ name = "store-api" version = "0.12.0" dependencies = [ "api", + "aquamarine", "async-stream", "async-trait", "common-base", diff --git a/src/metric-engine/Cargo.toml b/src/metric-engine/Cargo.toml index 666ac09faa75..85aa371594e8 100644 --- a/src/metric-engine/Cargo.toml +++ b/src/metric-engine/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] api.workspace = true +aquamarine.workspace = true async-trait.workspace = true base64.workspace = true common-base.workspace = true diff --git a/src/mito2/Cargo.toml b/src/mito2/Cargo.toml index 181ba0f43407..56d480df5a24 100644 --- a/src/mito2/Cargo.toml +++ b/src/mito2/Cargo.toml @@ -13,6 +13,7 @@ workspace = true [dependencies] api.workspace = true +aquamarine.workspace = true async-channel = "1.9" async-stream.workspace = true async-trait = "0.1" diff --git a/src/store-api/Cargo.toml b/src/store-api/Cargo.toml index 1214ae3d4001..7c974661e315 100644 --- a/src/store-api/Cargo.toml +++ b/src/store-api/Cargo.toml @@ -9,6 +9,7 @@ workspace = true [dependencies] api.workspace = true +aquamarine.workspace = true async-trait.workspace = true common-base.workspace = true common-error.workspace = true From bfc777e6ac1d1389aeae480241e22f9ea2c4621f Mon Sep 17 00:00:00 2001 From: Yingwen Date: Tue, 17 Dec 2024 12:01:32 +0800 Subject: [PATCH 34/59] fix: deletion between two put may not work in `last_non_null` mode (#5168) * fix: deletion between rows with the same key may not work * test: add sqlness test case * chore: comments --- src/mito2/src/read/dedup.rs | 45 ++++++++++++- .../common/insert/merge_mode.result | 65 +++++++++++++++++++ .../standalone/common/insert/merge_mode.sql | 27 ++++++++ 3 files changed, 136 insertions(+), 1 deletion(-) diff --git a/src/mito2/src/read/dedup.rs b/src/mito2/src/read/dedup.rs index c77d0c3fabe1..a29781b94746 100644 --- a/src/mito2/src/read/dedup.rs +++ b/src/mito2/src/read/dedup.rs @@ -224,6 +224,12 @@ pub(crate) struct DedupMetrics { } /// Buffer to store fields in the last row to merge. +/// +/// Usage: +/// We should call `maybe_init()` to initialize the builder and then call `push_first_row()` +/// to push the first row of batches that the timestamp is the same as the row in this builder. +/// Finally we should call `merge_last_non_null()` to merge the last non-null fields and +/// return the merged batch. struct LastFieldsBuilder { /// Filter deleted rows. filter_deleted: bool, @@ -311,6 +317,16 @@ impl LastFieldsBuilder { return; } + // Both `maybe_init()` and `push_first_row()` can update the builder. If the delete + // op is not in the latest row, then we can't set the deletion flag in the `maybe_init()`. + // We must check the batch and update the deletion flag here to prevent + // the builder from merging non-null fields in rows that insert before the deleted row. + self.contains_deletion = batch.op_types().get_data(0).unwrap() == OpType::Delete as u8; + if self.contains_deletion { + // Deletes this row. + return; + } + let fields = batch.fields(); for (idx, value) in self.last_fields.iter_mut().enumerate() { if value.is_null() && !fields[idx].data.is_null(0) { @@ -323,7 +339,8 @@ impl LastFieldsBuilder { } /// Merges last non-null fields, builds a new batch and resets the builder. - /// It may overwrites the last row of the `buffer`. + /// It may overwrites the last row of the `buffer`. The `buffer` is the batch + /// that initialized the builder. fn merge_last_non_null( &mut self, buffer: Batch, @@ -1082,6 +1099,32 @@ mod tests { ); } + #[test] + fn test_last_non_null_strategy_delete_middle() { + let input = [ + new_batch_multi_fields(b"k1", &[1], &[7], &[OpType::Put], &[(Some(11), None)]), + new_batch_multi_fields(b"k1", &[1], &[4], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[1], &[1], &[OpType::Put], &[(Some(12), Some(1))]), + new_batch_multi_fields(b"k1", &[2], &[8], &[OpType::Put], &[(Some(21), None)]), + new_batch_multi_fields(b"k1", &[2], &[5], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[2], &[2], &[OpType::Put], &[(Some(22), Some(2))]), + new_batch_multi_fields(b"k1", &[3], &[9], &[OpType::Put], &[(Some(31), None)]), + new_batch_multi_fields(b"k1", &[3], &[6], &[OpType::Delete], &[(None, None)]), + new_batch_multi_fields(b"k1", &[3], &[3], &[OpType::Put], &[(Some(32), Some(3))]), + ]; + + let mut strategy = LastNonNull::new(true); + check_dedup_strategy( + &input, + &mut strategy, + &[ + new_batch_multi_fields(b"k1", &[1], &[7], &[OpType::Put], &[(Some(11), None)]), + new_batch_multi_fields(b"k1", &[2], &[8], &[OpType::Put], &[(Some(21), None)]), + new_batch_multi_fields(b"k1", &[3], &[9], &[OpType::Put], &[(Some(31), None)]), + ], + ); + } + #[test] fn test_last_non_null_iter_on_batch() { let input = [new_batch_multi_fields( diff --git a/tests/cases/standalone/common/insert/merge_mode.result b/tests/cases/standalone/common/insert/merge_mode.result index f96ad2c8bce2..a98f6b6e38e7 100644 --- a/tests/cases/standalone/common/insert/merge_mode.result +++ b/tests/cases/standalone/common/insert/merge_mode.result @@ -92,6 +92,71 @@ DROP TABLE last_row_table; Affected Rows: 0 +CREATE TABLE IF NOT EXISTS `delete_between` ( + `time` TIMESTAMP(0) NOT NULL, + `code` STRING NULL, + `name` STRING NULL, + `status` TINYINT NULL, + TIME INDEX (`time`), + PRIMARY KEY (`code`) +) ENGINE=mito WITH( + merge_mode = 'last_non_null' +); + +Affected Rows: 0 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png', 0); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png', 0); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png', 1); + +Affected Rows: 1 + +SELECT * FROM `delete_between`; + ++---------------------+------+-------+--------+ +| time | code | name | status | ++---------------------+------+-------+--------+ +| 2024-11-26T10:00:00 | achn | 1.png | 0 | +| 2024-11-26T10:01:00 | achn | 2.png | 0 | +| 2024-11-26T10:02:00 | achn | 3.png | 1 | ++---------------------+------+-------+--------+ + +DELETE FROM `delete_between`; + +Affected Rows: 3 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png'); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png'); + +Affected Rows: 1 + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png'); + +Affected Rows: 1 + +SELECT * FROM `delete_between`; + ++---------------------+------+-------+--------+ +| time | code | name | status | ++---------------------+------+-------+--------+ +| 2024-11-26T10:00:00 | achn | 1.png | | +| 2024-11-26T10:01:00 | achn | 2.png | | +| 2024-11-26T10:02:00 | achn | 3.png | | ++---------------------+------+-------+--------+ + +DROP TABLE `delete_between`; + +Affected Rows: 0 + create table if not exists invalid_merge_mode( host string, ts timestamp, diff --git a/tests/cases/standalone/common/insert/merge_mode.sql b/tests/cases/standalone/common/insert/merge_mode.sql index 967f94933311..9d22cc13d659 100644 --- a/tests/cases/standalone/common/insert/merge_mode.sql +++ b/tests/cases/standalone/common/insert/merge_mode.sql @@ -44,6 +44,33 @@ SELECT * from last_row_table ORDER BY host, ts; DROP TABLE last_row_table; +CREATE TABLE IF NOT EXISTS `delete_between` ( + `time` TIMESTAMP(0) NOT NULL, + `code` STRING NULL, + `name` STRING NULL, + `status` TINYINT NULL, + TIME INDEX (`time`), + PRIMARY KEY (`code`) +) ENGINE=mito WITH( + merge_mode = 'last_non_null' +); + +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png', 0); +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png', 0); +INSERT INTO `delete_between` (`time`, `code`, `name`, `status`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png', 1); + +SELECT * FROM `delete_between`; + +DELETE FROM `delete_between`; + +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:00:00', 'achn', '1.png'); +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:01:00', 'achn', '2.png'); +INSERT INTO `delete_between` (`time`, `code`, `name`) VALUES ('2024-11-26 10:02:00', 'achn', '3.png'); + +SELECT * FROM `delete_between`; + +DROP TABLE `delete_between`; + create table if not exists invalid_merge_mode( host string, ts timestamp, From d821dc5a3eaa6af7cb7fa36939916dd4ac57a4a9 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 17 Dec 2024 14:55:42 +0800 Subject: [PATCH 35/59] feat(bloom-filter): add basic bloom filter creator (Part 1) (#5177) * feat(bloom-filter): add a simple bloom filter creator (Part 1) Signed-off-by: Zhenchi * fix: clippy Signed-off-by: Zhenchi * fix: header Signed-off-by: Zhenchi * docs: add format comment Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- Cargo.lock | 26 ++- src/index/Cargo.toml | 2 + src/index/src/bloom_filter.rs | 53 +++++ src/index/src/bloom_filter/creator.rs | 294 ++++++++++++++++++++++++++ src/index/src/bloom_filter/error.rs | 66 ++++++ src/index/src/lib.rs | 1 + 6 files changed, 439 insertions(+), 3 deletions(-) create mode 100644 src/index/src/bloom_filter.rs create mode 100644 src/index/src/bloom_filter/creator.rs create mode 100644 src/index/src/bloom_filter/error.rs diff --git a/Cargo.lock b/Cargo.lock index 1fa61c8c6f47..b86134a3edf0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3834,6 +3834,18 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "95765f67b4b18863968b4a1bd5bb576f732b29a4a28c7cd84c09fa3e2875f33c" +[[package]] +name = "fastbloom" +version = "0.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b679f25009b51b71506296f95fb6362ba7d0151172fa7373a8d1611b8bc5d10f" +dependencies = [ + "getrandom", + "rand", + "siphasher 1.0.1", + "wide", +] + [[package]] name = "fastdivide" version = "0.4.1" @@ -5213,6 +5225,7 @@ dependencies = [ "common-runtime", "common-telemetry", "common-test-util", + "fastbloom", "fst", "futures", "greptime-proto", @@ -5223,6 +5236,7 @@ dependencies = [ "regex", "regex-automata 0.4.8", "serde", + "serde_json", "snafu 0.8.5", "tantivy", "tantivy-jieba", @@ -8065,7 +8079,7 @@ version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096" dependencies = [ - "siphasher", + "siphasher 0.3.11", ] [[package]] @@ -8074,7 +8088,7 @@ version = "0.11.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90fcb95eef784c2ac79119d1dd819e162b5da872ce6f3c3abe1e8ca1c082f72b" dependencies = [ - "siphasher", + "siphasher 0.3.11", ] [[package]] @@ -10005,7 +10019,7 @@ dependencies = [ "once_cell", "radium", "rand", - "siphasher", + "siphasher 0.3.11", "unic-ucd-category", "volatile", "widestring", @@ -11016,6 +11030,12 @@ version = "0.3.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38b58827f4464d87d377d175e90bf58eb00fd8716ff0a62f80356b5e61555d0d" +[[package]] +name = "siphasher" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" + [[package]] name = "sketches-ddsketch" version = "0.2.2" diff --git a/src/index/Cargo.toml b/src/index/Cargo.toml index 772177147ae2..f46c64a17606 100644 --- a/src/index/Cargo.toml +++ b/src/index/Cargo.toml @@ -17,6 +17,7 @@ common-error.workspace = true common-macro.workspace = true common-runtime.workspace = true common-telemetry.workspace = true +fastbloom = "0.8" fst.workspace = true futures.workspace = true greptime-proto.workspace = true @@ -26,6 +27,7 @@ prost.workspace = true regex.workspace = true regex-automata.workspace = true serde.workspace = true +serde_json.workspace = true snafu.workspace = true tantivy = { version = "0.22", features = ["zstd-compression"] } tantivy-jieba = "0.11.0" diff --git a/src/index/src/bloom_filter.rs b/src/index/src/bloom_filter.rs new file mode 100644 index 000000000000..e68acc698a26 --- /dev/null +++ b/src/index/src/bloom_filter.rs @@ -0,0 +1,53 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use serde::{Deserialize, Serialize}; + +pub mod creator; +mod error; + +pub type Bytes = Vec; +pub type BytesRef<'a> = &'a [u8]; + +/// The Meta information of the bloom filter stored in the file. +#[derive(Debug, Default, Serialize, Deserialize)] +pub struct BloomFilterMeta { + /// The number of rows per segment. + pub rows_per_segment: usize, + + /// The number of segments. + pub seg_count: usize, + + /// The number of total rows. + pub row_count: usize, + + /// The size of the bloom filter excluding the meta information. + pub bloom_filter_segments_size: usize, + + /// Offset and size of bloom filters in the file. + pub bloom_filter_segments: Vec, +} + +/// The location of the bloom filter segment in the file. +#[derive(Debug, Serialize, Deserialize)] +pub struct BloomFilterSegmentLocation { + /// The offset of the bloom filter segment in the file. + pub offset: u64, + + /// The size of the bloom filter segment in the file. + pub size: u64, + + /// The number of elements in the bloom filter segment. + pub elem_count: usize, +} diff --git a/src/index/src/bloom_filter/creator.rs b/src/index/src/bloom_filter/creator.rs new file mode 100644 index 000000000000..b3c95d3a7626 --- /dev/null +++ b/src/index/src/bloom_filter/creator.rs @@ -0,0 +1,294 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::HashSet; + +use fastbloom::BloomFilter; +use futures::{AsyncWrite, AsyncWriteExt}; +use snafu::ResultExt; + +use super::error::{IoSnafu, SerdeJsonSnafu}; +use crate::bloom_filter::error::Result; +use crate::bloom_filter::{BloomFilterMeta, BloomFilterSegmentLocation, Bytes}; + +/// The seed used for the Bloom filter. +const SEED: u128 = 42; + +/// The false positive rate of the Bloom filter. +const FALSE_POSITIVE_RATE: f64 = 0.01; + +/// `BloomFilterCreator` is responsible for creating and managing bloom filters +/// for a set of elements. It divides the rows into segments and creates +/// bloom filters for each segment. +/// +/// # Format +/// +/// The bloom filter creator writes the following format to the writer: +/// +/// ```text +/// +--------------------+--------------------+-----+----------------------+----------------------+ +/// | Bloom filter 0 | Bloom filter 1 | ... | BloomFilterMeta | Meta size | +/// +--------------------+--------------------+-----+----------------------+----------------------+ +/// |<- bytes (size 0) ->|<- bytes (size 1) ->| ... |<- json (meta size) ->|<- u32 LE (4 bytes) ->| +/// ``` +/// +pub struct BloomFilterCreator { + /// The number of rows per segment set by the user. + rows_per_segment: usize, + + /// Row count that added to the bloom filter so far. + accumulated_row_count: usize, + + /// A set of distinct elements in the current segment. + cur_seg_distinct_elems: HashSet, + + /// The memory usage of the current segment's distinct elements. + cur_seg_distinct_elems_mem_usage: usize, + + /// Storage for finalized Bloom filters. + finalized_bloom_filters: FinalizedBloomFilterStorage, +} + +impl BloomFilterCreator { + /// Creates a new `BloomFilterCreator` with the specified number of rows per segment. + /// + /// # PANICS + /// + /// `rows_per_segment` <= 0 + pub fn new(rows_per_segment: usize) -> Self { + assert!( + rows_per_segment > 0, + "rows_per_segment must be greater than 0" + ); + + Self { + rows_per_segment, + accumulated_row_count: 0, + cur_seg_distinct_elems: HashSet::default(), + cur_seg_distinct_elems_mem_usage: 0, + finalized_bloom_filters: FinalizedBloomFilterStorage::default(), + } + } + + /// Adds a row of elements to the bloom filter. If the number of accumulated rows + /// reaches `rows_per_segment`, it finalizes the current segment. + pub fn push_row_elems(&mut self, elems: impl IntoIterator) { + self.accumulated_row_count += 1; + for elem in elems.into_iter() { + let len = elem.len(); + let is_new = self.cur_seg_distinct_elems.insert(elem); + if is_new { + self.cur_seg_distinct_elems_mem_usage += len; + } + } + + if self.accumulated_row_count % self.rows_per_segment == 0 { + self.finalize_segment(); + } + } + + /// Finalizes any remaining segments and writes the bloom filters and metadata to the provided writer. + pub async fn finish(&mut self, mut writer: impl AsyncWrite + Unpin) -> Result<()> { + if !self.cur_seg_distinct_elems.is_empty() { + self.finalize_segment(); + } + + let mut meta = BloomFilterMeta { + rows_per_segment: self.rows_per_segment, + seg_count: self.finalized_bloom_filters.len(), + row_count: self.accumulated_row_count, + ..Default::default() + }; + + let mut buf = Vec::new(); + for segment in self.finalized_bloom_filters.drain() { + let slice = segment.bloom_filter.as_slice(); + buf.clear(); + write_u64_slice(&mut buf, slice); + writer.write_all(&buf).await.context(IoSnafu)?; + + let size = buf.len(); + meta.bloom_filter_segments.push(BloomFilterSegmentLocation { + offset: meta.bloom_filter_segments_size as _, + size: size as _, + elem_count: segment.element_count, + }); + meta.bloom_filter_segments_size += size; + } + + let meta_bytes = serde_json::to_vec(&meta).context(SerdeJsonSnafu)?; + writer.write_all(&meta_bytes).await.context(IoSnafu)?; + + let meta_size = meta_bytes.len() as u32; + writer + .write_all(&meta_size.to_le_bytes()) + .await + .context(IoSnafu)?; + writer.flush().await.unwrap(); + + Ok(()) + } + + /// Returns the memory usage of the creating bloom filter. + pub fn memory_usage(&self) -> usize { + self.cur_seg_distinct_elems_mem_usage + self.finalized_bloom_filters.memory_usage() + } + + fn finalize_segment(&mut self) { + let elem_count = self.cur_seg_distinct_elems.len(); + self.finalized_bloom_filters + .add(self.cur_seg_distinct_elems.drain(), elem_count); + self.cur_seg_distinct_elems_mem_usage = 0; + } +} + +/// Storage for finalized Bloom filters. +/// +/// TODO(zhongzc): Add support for storing intermediate bloom filters on disk to control memory usage. +#[derive(Debug, Default)] +struct FinalizedBloomFilterStorage { + /// Bloom filters that are stored in memory. + in_memory: Vec, +} + +impl FinalizedBloomFilterStorage { + fn memory_usage(&self) -> usize { + self.in_memory.iter().map(|s| s.size).sum() + } + + /// Adds a new finalized Bloom filter to the storage. + /// + /// TODO(zhongzc): Add support for flushing to disk. + fn add(&mut self, elems: impl IntoIterator, elem_count: usize) { + let mut bf = BloomFilter::with_false_pos(FALSE_POSITIVE_RATE) + .seed(&SEED) + .expected_items(elem_count); + for elem in elems.into_iter() { + bf.insert(&elem); + } + + let cbf = FinalizedBloomFilterSegment::new(bf, elem_count); + self.in_memory.push(cbf); + } + + fn len(&self) -> usize { + self.in_memory.len() + } + + fn drain(&mut self) -> impl Iterator + '_ { + self.in_memory.drain(..) + } +} + +/// A finalized Bloom filter segment. +#[derive(Debug)] +struct FinalizedBloomFilterSegment { + /// The underlying Bloom filter. + bloom_filter: BloomFilter, + + /// The number of elements in the Bloom filter. + element_count: usize, + + /// The occupied memory size of the Bloom filter. + size: usize, +} + +impl FinalizedBloomFilterSegment { + fn new(bloom_filter: BloomFilter, elem_count: usize) -> Self { + let memory_usage = std::mem::size_of_val(bloom_filter.as_slice()); + Self { + bloom_filter, + element_count: elem_count, + size: memory_usage, + } + } +} + +/// Writes a slice of `u64` to the buffer in little-endian order. +fn write_u64_slice(buf: &mut Vec, slice: &[u64]) { + buf.reserve(std::mem::size_of_val(slice)); + for &x in slice { + buf.extend_from_slice(&x.to_le_bytes()); + } +} + +#[cfg(test)] +mod tests { + use futures::io::Cursor; + + use super::*; + + fn u64_vec_from_bytes(bytes: &[u8]) -> Vec { + bytes + .chunks_exact(std::mem::size_of::()) + .map(|chunk| u64::from_le_bytes(chunk.try_into().unwrap())) + .collect() + } + + #[tokio::test] + async fn test_bloom_filter_creator() { + let mut writer = Cursor::new(Vec::new()); + let mut creator = BloomFilterCreator::new(2); + + creator.push_row_elems(vec![b"a".to_vec(), b"b".to_vec()]); + assert!(creator.cur_seg_distinct_elems_mem_usage > 0); + assert!(creator.memory_usage() > 0); + + creator.push_row_elems(vec![b"c".to_vec(), b"d".to_vec()]); + // Finalize the first segment + assert!(creator.cur_seg_distinct_elems_mem_usage == 0); + assert!(creator.memory_usage() > 0); + + creator.push_row_elems(vec![b"e".to_vec(), b"f".to_vec()]); + assert!(creator.cur_seg_distinct_elems_mem_usage > 0); + assert!(creator.memory_usage() > 0); + + creator.finish(&mut writer).await.unwrap(); + + let bytes = writer.into_inner(); + let total_size = bytes.len(); + let meta_size_offset = total_size - 4; + let meta_size = u32::from_le_bytes((&bytes[meta_size_offset..]).try_into().unwrap()); + + let meta_bytes = &bytes[total_size - meta_size as usize - 4..total_size - 4]; + let meta: BloomFilterMeta = serde_json::from_slice(meta_bytes).unwrap(); + + assert_eq!(meta.rows_per_segment, 2); + assert_eq!(meta.seg_count, 2); + assert_eq!(meta.row_count, 3); + assert_eq!( + meta.bloom_filter_segments_size + meta_bytes.len() + 4, + total_size + ); + + let mut bfs = Vec::new(); + for segment in meta.bloom_filter_segments { + let bloom_filter_bytes = + &bytes[segment.offset as usize..(segment.offset + segment.size) as usize]; + let v = u64_vec_from_bytes(bloom_filter_bytes); + let bloom_filter = BloomFilter::from_vec(v) + .seed(&SEED) + .expected_items(segment.elem_count); + bfs.push(bloom_filter); + } + + assert_eq!(bfs.len(), 2); + assert!(bfs[0].contains(&b"a")); + assert!(bfs[0].contains(&b"b")); + assert!(bfs[0].contains(&b"c")); + assert!(bfs[0].contains(&b"d")); + assert!(bfs[1].contains(&b"e")); + assert!(bfs[1].contains(&b"f")); + } +} diff --git a/src/index/src/bloom_filter/error.rs b/src/index/src/bloom_filter/error.rs new file mode 100644 index 000000000000..8e95dc52255e --- /dev/null +++ b/src/index/src/bloom_filter/error.rs @@ -0,0 +1,66 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; + +use common_error::ext::{BoxedError, ErrorExt}; +use common_error::status_code::StatusCode; +use common_macro::stack_trace_debug; +use snafu::{Location, Snafu}; + +#[derive(Snafu)] +#[snafu(visibility(pub))] +#[stack_trace_debug] +pub enum Error { + #[snafu(display("IO error"))] + Io { + #[snafu(source)] + error: std::io::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to serde json"))] + SerdeJson { + #[snafu(source)] + error: serde_json::error::Error, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("External error"))] + External { + source: BoxedError, + #[snafu(implicit)] + location: Location, + }, +} + +impl ErrorExt for Error { + fn status_code(&self) -> StatusCode { + use Error::*; + + match self { + Io { .. } | Self::SerdeJson { .. } => StatusCode::Unexpected, + + External { source, .. } => source.status_code(), + } + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +pub type Result = std::result::Result; diff --git a/src/index/src/lib.rs b/src/index/src/lib.rs index 5e2e41166863..e52a93138f68 100644 --- a/src/index/src/lib.rs +++ b/src/index/src/lib.rs @@ -15,5 +15,6 @@ #![feature(iter_partition_in_place)] #![feature(assert_matches)] +pub mod bloom_filter; pub mod fulltext_index; pub mod inverted_index; From 421088a868821245119703614252dff1e9b33158 Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:00:02 +0800 Subject: [PATCH 36/59] test: sqlness upgrade compatibility tests (#5126) * feat: simple version switch * chore: remove debug print * chore: add common folder * tests: add drop table * feat: pull versioned binary * chore: don't use native-tls * chore: rm outdated docs * chore: new line * fix: save old bin dir * fix: switch version restart all node * feat: use etcd * fix: wait for election * fix: normal sqlness * refactor: hashmap for bin dir * test: past 3 major version compat crate table * refactor: allow using without setup etcd --- Cargo.lock | 70 ++++- src/common/meta/src/kv_backend/etcd.rs | 2 + tests/conf/metasrv-test.toml.template | 10 + tests/runner/Cargo.toml | 12 +- tests/runner/src/env.rs | 190 ++++++++++-- tests/runner/src/main.rs | 30 ++ tests/runner/src/util.rs | 283 +++++++++++++++++- tests/upgrade-compat/distributed/common | 1 + .../common/table_engine_0_10_2.result | 137 +++++++++ .../standalone/common/table_engine_0_10_2.sql | 60 ++++ .../common/table_engine_v0_11_0.result | 137 +++++++++ .../common/table_engine_v0_11_0.sql | 60 ++++ .../common/table_engine_v0_9_5.result | 137 +++++++++ .../standalone/common/table_engine_v0_9_5.sql | 60 ++++ .../standalone/common/test_simple.result | 47 +++ .../standalone/common/test_simple.sql | 22 ++ .../standalone/common/test_ttl.result | 153 ++++++++++ .../standalone/common/test_ttl.sql | 42 +++ 18 files changed, 1420 insertions(+), 33 deletions(-) create mode 120000 tests/upgrade-compat/distributed/common create mode 100644 tests/upgrade-compat/standalone/common/table_engine_0_10_2.result create mode 100644 tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result create mode 100644 tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql create mode 100644 tests/upgrade-compat/standalone/common/test_simple.result create mode 100644 tests/upgrade-compat/standalone/common/test_simple.sql create mode 100644 tests/upgrade-compat/standalone/common/test_ttl.result create mode 100644 tests/upgrade-compat/standalone/common/test_ttl.sql diff --git a/Cargo.lock b/Cargo.lock index b86134a3edf0..ea2931f09808 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6026,6 +6026,18 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" +[[package]] +name = "local-ip-address" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3669cf5561f8d27e8fc84cc15e58350e70f557d4d65f70e3154e54cd2f8e1782" +dependencies = [ + "libc", + "neli", + "thiserror 1.0.64", + "windows-sys 0.59.0", +] + [[package]] name = "lock_api" version = "0.4.12" @@ -6992,6 +7004,31 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "27b02d87554356db9e9a873add8782d4ea6e3e58ea071a9adb9a2e8ddb884a8b" +[[package]] +name = "neli" +version = "0.6.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1100229e06604150b3becd61a4965d5c70f3be1759544ea7274166f4be41ef43" +dependencies = [ + "byteorder", + "libc", + "log", + "neli-proc-macros", +] + +[[package]] +name = "neli-proc-macros" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c168194d373b1e134786274020dae7fc5513d565ea2ebb9bc9ff17ffb69106d4" +dependencies = [ + "either", + "proc-macro2", + "quote", + "serde", + "syn 1.0.109", +] + [[package]] name = "new_debug_unreachable" version = "1.0.6" @@ -9380,9 +9417,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.8" +version = "0.12.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f713147fbe92361e52392c73b8c9e48c04c6625bce969ef54dc901e58e042a7b" +checksum = "a77c62af46e79de0a562e1a9849205ffcb7fc1238876e9bd743357570e04046f" dependencies = [ "base64 0.22.1", "bytes", @@ -11280,14 +11317,21 @@ dependencies = [ "common-recordbatch", "common-time", "datatypes", + "flate2", + "hex", + "local-ip-address", "mysql", + "reqwest", "serde", "serde_json", + "sha2", "sqlness", + "tar", "tempfile", "tinytemplate", "tokio", "tokio-postgres", + "tokio-stream", ] [[package]] @@ -12043,6 +12087,17 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" +[[package]] +name = "tar" +version = "0.4.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c65998313f8e17d0d553d28f91a0df93e4dbbbf770279c7bc21ca0f09ea1a1f6" +dependencies = [ + "filetime", + "libc", + "xattr", +] + [[package]] name = "target-lexicon" version = "0.12.16" @@ -14168,6 +14223,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "xattr" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8da84f1a25939b27f6820d92aed108f83ff920fdf11a7b19366c27c4cda81d4f" +dependencies = [ + "libc", + "linux-raw-sys", + "rustix", +] + [[package]] name = "xml-rs" version = "0.8.22" diff --git a/src/common/meta/src/kv_backend/etcd.rs b/src/common/meta/src/kv_backend/etcd.rs index 1cdd45bc5c13..a787940b6df0 100644 --- a/src/common/meta/src/kv_backend/etcd.rs +++ b/src/common/meta/src/kv_backend/etcd.rs @@ -15,6 +15,7 @@ use std::any::Any; use std::sync::Arc; +use common_telemetry::info; use etcd_client::{ Client, DeleteOptions, GetOptions, PutOptions, Txn, TxnOp, TxnOpResponse, TxnResponse, }; @@ -55,6 +56,7 @@ impl EtcdStore { } pub fn with_etcd_client(client: Client, max_txn_ops: usize) -> KvBackendRef { + info!("Connected to etcd"); Arc::new(Self { client, max_txn_ops, diff --git a/tests/conf/metasrv-test.toml.template b/tests/conf/metasrv-test.toml.template index 8d27aad3c4b2..1196403a2648 100644 --- a/tests/conf/metasrv-test.toml.template +++ b/tests/conf/metasrv-test.toml.template @@ -1,4 +1,14 @@ flush_stats_factor = 1 +{{ if use_etcd }} +## Store server address default to etcd store. +store_addrs = [{store_addrs | unescaped}] + +## Store data in memory. +use_memory_store = false + +## The datastore for meta server. +backend = "EtcdStore" +{{ endif }} [wal] {{ if is_raft_engine }} provider = "raft_engine" diff --git a/tests/runner/Cargo.toml b/tests/runner/Cargo.toml index 71312c39dea3..3ea403e862e0 100644 --- a/tests/runner/Cargo.toml +++ b/tests/runner/Cargo.toml @@ -16,12 +16,18 @@ common-query.workspace = true common-recordbatch.workspace = true common-time.workspace = true datatypes = { workspace = true } +flate2 = "1.0" +hex = "0.4" +local-ip-address = "0.6" mysql = { version = "25.0.1", default-features = false, features = ["minimal", "rustls-tls"] } +reqwest = { version = "0.12", default-features = false, features = ["rustls-tls"] } serde.workspace = true serde_json.workspace = true -tokio-postgres = { workspace = true } -# sqlness 0.6.0 have a bug causing `cargo sqlness` to fail(see https://github.com/CeresDB/sqlness/issues/68) which is fixed in 0.6.1 -sqlness = "0.6.1" +sha2 = "0.10" +sqlness = "0.6.1" # sqlness 0.6.0 have a bug causing `cargo sqlness` to fail(see https://github.com/CeresDB/sqlness/issues/68) which is fixed in 0.6.1 +tar = "0.4" tempfile.workspace = true tinytemplate = "1.2" tokio.workspace = true +tokio-postgres = { workspace = true } +tokio-stream.workspace = true diff --git a/tests/runner/src/env.rs b/tests/runner/src/env.rs index bb5d74a26702..81bbe2fb0b07 100644 --- a/tests/runner/src/env.rs +++ b/tests/runner/src/env.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::borrow::Cow; +use std::collections::HashMap; use std::fmt::Display; use std::fs::OpenOptions; use std::io; @@ -45,6 +46,7 @@ use tokio::sync::Mutex as TokioMutex; use tokio_postgres::{Client as PgClient, SimpleQueryMessage as PgRow}; use crate::protocol_interceptor::{MYSQL, PROTOCOL_KEY}; +use crate::util::{get_workspace_root, maybe_pull_binary, PROGRAM}; use crate::{util, ServerAddr}; const METASRV_ADDR: &str = "127.0.0.1:29302"; @@ -64,6 +66,12 @@ pub enum WalConfig { }, } +#[derive(Clone)] +pub struct StoreConfig { + pub store_addrs: Vec, + pub setup_etcd: bool, +} + #[derive(Clone)] pub struct Env { sqlness_home: PathBuf, @@ -74,6 +82,12 @@ pub struct Env { /// When running in CI, this is expected to be set. /// If not set, this runner will build the GreptimeDB binary itself when needed, and set this field by then. bins_dir: Arc>>, + /// The path to the directory that contains the old pre-built GreptimeDB binaries. + versioned_bins_dirs: Arc>>, + /// Pull different versions of GreptimeDB on need. + pull_version_on_need: bool, + /// Store address for metasrv metadata + store_config: StoreConfig, } #[async_trait] @@ -100,13 +114,21 @@ impl Env { data_home: PathBuf, server_addrs: ServerAddr, wal: WalConfig, + pull_version_on_need: bool, bins_dir: Option, + store_config: StoreConfig, ) -> Self { Self { sqlness_home: data_home, server_addrs, wal, - bins_dir: Arc::new(Mutex::new(bins_dir)), + pull_version_on_need, + bins_dir: Arc::new(Mutex::new(bins_dir.clone())), + versioned_bins_dirs: Arc::new(Mutex::new(HashMap::from_iter([( + "latest".to_string(), + bins_dir.clone().unwrap_or(util::get_binary_dir("debug")), + )]))), + store_config, } } @@ -117,7 +139,7 @@ impl Env { self.build_db(); self.setup_wal(); - let db_ctx = GreptimeDBContext::new(self.wal.clone()); + let db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone()); let server_process = self.start_server("standalone", &db_ctx, true).await; @@ -136,8 +158,9 @@ impl Env { } else { self.build_db(); self.setup_wal(); + self.setup_etcd(); - let db_ctx = GreptimeDBContext::new(self.wal.clone()); + let db_ctx = GreptimeDBContext::new(self.wal.clone(), self.store_config.clone()); // start a distributed GreptimeDB let meta_server = self.start_server("metasrv", &db_ctx, true).await; @@ -152,12 +175,12 @@ impl Env { let mut greptimedb = self.connect_db(&Default::default()).await; - greptimedb.metasrv_process = Some(meta_server); + greptimedb.metasrv_process = Some(meta_server).into(); greptimedb.server_processes = Some(Arc::new(Mutex::new(vec![ datanode_1, datanode_2, datanode_3, ]))); - greptimedb.frontend_process = Some(frontend); - greptimedb.flownode_process = Some(flownode); + greptimedb.frontend_process = Some(frontend).into(); + greptimedb.flownode_process = Some(flownode).into(); greptimedb.is_standalone = false; greptimedb.ctx = db_ctx; @@ -237,13 +260,14 @@ impl Env { pg_client: TokioMutex::new(pg_client), mysql_client: TokioMutex::new(mysql_client), server_processes: None, - metasrv_process: None, - frontend_process: None, - flownode_process: None, + metasrv_process: None.into(), + frontend_process: None.into(), + flownode_process: None.into(), ctx: GreptimeDBContext { time: 0, datanode_id: Default::default(), wal: self.wal.clone(), + store_config: self.store_config.clone(), }, is_standalone: false, env: self.clone(), @@ -341,7 +365,7 @@ impl Env { ) } "metasrv" => { - let args = vec![ + let mut args = vec![ DEFAULT_LOG_LEVEL.to_string(), subcommand.to_string(), "start".to_string(), @@ -349,8 +373,6 @@ impl Env { "127.0.0.1:29302".to_string(), "--server-addr".to_string(), "127.0.0.1:29302".to_string(), - "--backend".to_string(), - "memory-store".to_string(), "--enable-region-failover".to_string(), "false".to_string(), "--http-addr=127.0.0.1:29502".to_string(), @@ -361,6 +383,9 @@ impl Env { "-c".to_string(), self.generate_config_file(subcommand, db_ctx), ]; + if db_ctx.store_config().store_addrs.is_empty() { + args.extend(vec!["--backend".to_string(), "memory-store".to_string()]) + } (args, vec![METASRV_ADDR.to_string()]) } _ => panic!("Unexpected subcommand: {subcommand}"), @@ -375,23 +400,20 @@ impl Env { } } - #[cfg(not(windows))] - let program = "./greptime"; - #[cfg(windows)] - let program = "greptime.exe"; + let program = PROGRAM; let bins_dir = self.bins_dir.lock().unwrap().clone().expect( "GreptimeDB binary is not available. Please pass in the path to the directory that contains the pre-built GreptimeDB binary. Or you may call `self.build_db()` beforehand.", ); let mut process = Command::new(program) - .current_dir(bins_dir) + .current_dir(bins_dir.clone()) .env("TZ", "UTC") .args(args) .stdout(stdout_file) .spawn() .unwrap_or_else(|error| { - panic!("Failed to start the DB with subcommand {subcommand},Error: {error}") + panic!("Failed to start the DB with subcommand {subcommand},Error: {error}, path: {:?}", bins_dir.join(program)); }); for check_ip_addr in &check_ip_addrs { @@ -452,7 +474,7 @@ impl Env { } /// stop and restart the server process - async fn restart_server(&self, db: &GreptimeDB) { + async fn restart_server(&self, db: &GreptimeDB, is_full_restart: bool) { { if let Some(server_process) = db.server_processes.clone() { let mut server_processes = server_process.lock().unwrap(); @@ -460,6 +482,23 @@ impl Env { Env::stop_server(server_process); } } + if is_full_restart { + if let Some(mut metasrv_process) = + db.metasrv_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut metasrv_process); + } + if let Some(mut frontend_process) = + db.frontend_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut frontend_process); + } + if let Some(mut flownode_process) = + db.flownode_process.lock().expect("poisoned lock").take() + { + Env::stop_server(&mut flownode_process); + } + } } // check if the server is distributed or standalone @@ -468,12 +507,37 @@ impl Env { vec![new_server_process] } else { db.ctx.reset_datanode_id(); + if is_full_restart { + let metasrv = self.start_server("metasrv", &db.ctx, false).await; + db.metasrv_process + .lock() + .expect("lock poisoned") + .replace(metasrv); + + // wait for metasrv to start + // since it seems older version of db might take longer to complete election + tokio::time::sleep(Duration::from_secs(5)).await; + } let mut processes = vec![]; for _ in 0..3 { let new_server_process = self.start_server("datanode", &db.ctx, false).await; processes.push(new_server_process); } + + if is_full_restart { + let frontend = self.start_server("frontend", &db.ctx, false).await; + db.frontend_process + .lock() + .expect("lock poisoned") + .replace(frontend); + + let flownode = self.start_server("flownode", &db.ctx, false).await; + db.flownode_process + .lock() + .expect("lock poisoned") + .replace(flownode); + } processes }; @@ -493,6 +557,19 @@ impl Env { } } + /// Setup etcd if needed. + fn setup_etcd(&self) { + if self.store_config.setup_etcd { + let client_ports = self + .store_config + .store_addrs + .iter() + .map(|s| s.split(':').nth(1).unwrap().parse::().unwrap()) + .collect::>(); + util::setup_etcd(client_ports, None, None); + } + } + /// Generate config file to `/tmp/{subcommand}-{current_time}.toml` fn generate_config_file(&self, subcommand: &str, db_ctx: &GreptimeDBContext) -> String { let mut tt = TinyTemplate::new(); @@ -509,6 +586,8 @@ impl Env { procedure_dir: String, is_raft_engine: bool, kafka_wal_broker_endpoints: String, + use_etcd: bool, + store_addrs: String, } let data_home = self.sqlness_home.join(format!("greptimedb-{subcommand}")); @@ -522,6 +601,15 @@ impl Env { procedure_dir, is_raft_engine: db_ctx.is_raft_engine(), kafka_wal_broker_endpoints: db_ctx.kafka_wal_broker_endpoints(), + use_etcd: !self.store_config.store_addrs.is_empty(), + store_addrs: self + .store_config + .store_addrs + .clone() + .iter() + .map(|p| format!("\"{p}\"")) + .collect::>() + .join(","), }; let rendered = tt.render(subcommand, &ctx).unwrap(); @@ -580,9 +668,9 @@ impl Env { pub struct GreptimeDB { server_processes: Option>>>, - metasrv_process: Option, - frontend_process: Option, - flownode_process: Option, + metasrv_process: Mutex>, + frontend_process: Mutex>, + flownode_process: Mutex>, grpc_client: TokioMutex, pg_client: TokioMutex, mysql_client: TokioMutex, @@ -693,8 +781,35 @@ impl GreptimeDB { impl Database for GreptimeDB { async fn query(&self, ctx: QueryContext, query: String) -> Box { if ctx.context.contains_key("restart") && self.env.server_addrs.server_addr.is_none() { - self.env.restart_server(self).await; + self.env.restart_server(self, false).await; + } else if let Some(version) = ctx.context.get("version") { + let version_bin_dir = self + .env + .versioned_bins_dirs + .lock() + .expect("lock poison") + .get(version.as_str()) + .cloned(); + + match version_bin_dir { + Some(path) if path.clone().join(PROGRAM).is_file() => { + // use version in versioned_bins_dirs + *self.env.bins_dir.lock().unwrap() = Some(path.clone()); + } + _ => { + // use version in dir files + maybe_pull_binary(version, self.env.pull_version_on_need).await; + let root = get_workspace_root(); + let new_path = PathBuf::from_iter([&root, version]); + *self.env.bins_dir.lock().unwrap() = Some(new_path); + } + } + + self.env.restart_server(self, true).await; + // sleep for a while to wait for the server to fully boot up + tokio::time::sleep(Duration::from_secs(5)).await; } + if let Some(protocol) = ctx.context.get(PROTOCOL_KEY) { // protocol is bound to be either "mysql" or "postgres" if protocol == MYSQL { @@ -720,15 +835,30 @@ impl GreptimeDB { ); } } - if let Some(mut metasrv) = self.metasrv_process.take() { + if let Some(mut metasrv) = self + .metasrv_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut metasrv); println!("Metasrv (pid = {}) is stopped", metasrv.id()); } - if let Some(mut frontend) = self.frontend_process.take() { + if let Some(mut frontend) = self + .frontend_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut frontend); println!("Frontend (pid = {}) is stopped", frontend.id()); } - if let Some(mut flownode) = self.flownode_process.take() { + if let Some(mut flownode) = self + .flownode_process + .lock() + .expect("someone else panic when holding lock") + .take() + { Env::stop_server(&mut flownode); println!("Flownode (pid = {}) is stopped", flownode.id()); } @@ -752,14 +882,16 @@ struct GreptimeDBContext { time: i64, datanode_id: AtomicU32, wal: WalConfig, + store_config: StoreConfig, } impl GreptimeDBContext { - pub fn new(wal: WalConfig) -> Self { + pub fn new(wal: WalConfig, store_config: StoreConfig) -> Self { Self { time: common_time::util::current_time_millis(), datanode_id: AtomicU32::new(0), wal, + store_config, } } @@ -787,6 +919,10 @@ impl GreptimeDBContext { fn reset_datanode_id(&self) { self.datanode_id.store(0, Ordering::Relaxed); } + + fn store_config(&self) -> StoreConfig { + self.store_config.clone() + } } struct ResultDisplayer { diff --git a/tests/runner/src/main.rs b/tests/runner/src/main.rs index eca72f280e2a..2e3158e1953b 100644 --- a/tests/runner/src/main.rs +++ b/tests/runner/src/main.rs @@ -22,6 +22,8 @@ use env::{Env, WalConfig}; use sqlness::interceptor::Registry; use sqlness::{ConfigBuilder, Runner}; +use crate::env::StoreConfig; + mod env; mod protocol_interceptor; mod util; @@ -92,6 +94,18 @@ struct Args { /// This may affect future test runs. #[clap(long)] preserve_state: bool, + + /// Pull Different versions of GreptimeDB on need. + #[clap(long, default_value = "true")] + pull_version_on_need: bool, + + /// The store addresses for metadata, if empty, will use memory store. + #[clap(long)] + store_addrs: Vec, + + /// Whether to setup etcd, by default it is false. + #[clap(long, default_value = "false")] + setup_etcd: bool, } #[tokio::main] @@ -110,6 +124,11 @@ async fn main() { Arc::new(protocol_interceptor::ProtocolInterceptorFactory), ); + if let Some(d) = &args.case_dir { + if !d.is_dir() { + panic!("{} is not a directory", d.display()); + } + } let config = ConfigBuilder::default() .case_dir(util::get_case_dir(args.case_dir)) .fail_fast(args.fail_fast) @@ -132,19 +151,30 @@ async fn main() { }, }; + let store = StoreConfig { + store_addrs: args.store_addrs.clone(), + setup_etcd: args.setup_etcd, + }; + let runner = Runner::new( config, Env::new( sqlness_home.clone(), args.server_addr.clone(), wal, + args.pull_version_on_need, args.bins_dir, + store, ), ); runner.run().await.unwrap(); // clean up and exit if !args.preserve_state { + if args.setup_etcd { + println!("Stopping etcd"); + util::stop_rm_etcd(); + } println!("Removing state in {:?}", sqlness_home); tokio::fs::remove_dir_all(sqlness_home).await.unwrap(); } diff --git a/tests/runner/src/util.rs b/tests/runner/src/util.rs index 04c336e1485c..4bcd482a26bf 100644 --- a/tests/runner/src/util.rs +++ b/tests/runner/src/util.rs @@ -12,18 +12,299 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::io::Read; use std::net::SocketAddr; -use std::path::PathBuf; +use std::path::{Path, PathBuf}; use std::process::Command; use std::time::Duration; +use sha2::{Digest, Sha256}; use tokio::io::AsyncWriteExt; use tokio::net::TcpSocket; use tokio::time; +use tokio_stream::StreamExt; /// Check port every 0.1 second. const PORT_CHECK_INTERVAL: Duration = Duration::from_millis(100); +#[cfg(not(windows))] +pub const PROGRAM: &str = "./greptime"; +#[cfg(windows)] +pub const PROGRAM: &str = "greptime.exe"; + +fn http_proxy() -> Option { + for proxy in ["http_proxy", "HTTP_PROXY", "all_proxy", "ALL_PROXY"] { + if let Ok(proxy_addr) = std::env::var(proxy) { + println!("Getting Proxy from env var: {}={}", proxy, proxy_addr); + return Some(proxy_addr); + } + } + None +} + +fn https_proxy() -> Option { + for proxy in ["https_proxy", "HTTPS_PROXY", "all_proxy", "ALL_PROXY"] { + if let Ok(proxy_addr) = std::env::var(proxy) { + println!("Getting Proxy from env var: {}={}", proxy, proxy_addr); + return Some(proxy_addr); + } + } + None +} + +async fn download_files(url: &str, path: &str) { + let proxy = if url.starts_with("http://") { + http_proxy().map(|proxy| reqwest::Proxy::http(proxy).unwrap()) + } else if url.starts_with("https://") { + https_proxy().map(|proxy| reqwest::Proxy::https(proxy).unwrap()) + } else { + None + }; + + let client = proxy + .map(|proxy| { + reqwest::Client::builder() + .proxy(proxy) + .build() + .expect("Failed to build client") + }) + .unwrap_or(reqwest::Client::new()); + + let mut file = tokio::fs::File::create(path) + .await + .unwrap_or_else(|_| panic!("Failed to create file in {path}")); + println!("Downloading {}...", url); + + let resp = client + .get(url) + .send() + .await + .expect("Failed to send download request"); + let len = resp.content_length(); + let mut stream = resp.bytes_stream(); + let mut size_downloaded = 0; + + while let Some(chunk_result) = stream.next().await { + let chunk = chunk_result.unwrap(); + size_downloaded += chunk.len(); + if let Some(len) = len { + print!("\rDownloading {}/{} bytes", size_downloaded, len); + } else { + print!("\rDownloaded {} bytes", size_downloaded); + } + + file.write_all(&chunk).await.unwrap(); + } + + file.flush().await.unwrap(); + + println!("\nDownloaded {}", url); +} + +fn decompress(archive: &str, dest: &str) { + let tar = std::fs::File::open(archive).unwrap(); + let dec = flate2::read::GzDecoder::new(tar); + let mut a = tar::Archive::new(dec); + a.unpack(dest).unwrap(); +} + +/// Use curl to download the binary from the release page. +/// +/// # Arguments +/// +/// * `version` - The version of the binary to download. i.e. "v0.9.5" +pub async fn pull_binary(version: &str) { + let os = std::env::consts::OS; + let arch = match std::env::consts::ARCH { + "x86_64" => "amd64", + "aarch64" => "arm64", + _ => panic!("Unsupported arch: {}", std::env::consts::ARCH), + }; + let triple = format!("greptime-{}-{}-{}", os, arch, version); + let filename = format!("{triple}.tar.gz"); + + let url = format!( + "https://github.com/GreptimeTeam/greptimedb/releases/download/{version}/{filename}" + ); + println!("Downloading {version} binary from {}", url); + + // mkdir {version} + let _ = std::fs::create_dir(version); + + let archive = Path::new(version).join(filename); + let folder_path = Path::new(version); + + // download the binary to the version directory + download_files(&url, &archive.to_string_lossy()).await; + + let checksum_file = format!("{triple}.sha256sum"); + let checksum_url = format!( + "https://github.com/GreptimeTeam/greptimedb/releases/download/{version}/{checksum_file}" + ); + download_files( + &checksum_url, + &PathBuf::from_iter([version, &checksum_file]).to_string_lossy(), + ) + .await; + + // verify the checksum + let mut file = std::fs::File::open(&archive).unwrap(); + let mut sha256 = Sha256::new(); + std::io::copy(&mut file, &mut sha256).unwrap(); + let checksum: Vec = sha256.finalize().to_vec(); + + let mut expected_checksum = + std::fs::File::open(PathBuf::from_iter([version, &checksum_file])).unwrap(); + let mut buf = String::new(); + expected_checksum.read_to_string(&mut buf).unwrap(); + let expected_checksum = hex::decode(buf.lines().next().unwrap()).unwrap(); + + assert_eq!( + checksum, expected_checksum, + "Checksum mismatched, downloaded file is corrupted" + ); + + decompress(&archive.to_string_lossy(), &folder_path.to_string_lossy()); + println!("Downloaded and extracted {version} binary to {folder_path:?}"); + + // move the binary to the version directory + std::fs::rename( + PathBuf::from_iter([version, &triple, "greptime"]), + PathBuf::from_iter([version, "greptime"]), + ) + .unwrap(); + + // remove the archive and inner folder + std::fs::remove_file(&archive).unwrap(); + std::fs::remove_dir(PathBuf::from_iter([version, &triple])).unwrap(); +} + +/// Pull the binary if it does not exist and `pull_version_on_need` is true. +pub async fn maybe_pull_binary(version: &str, pull_version_on_need: bool) { + let exist = Path::new(version).join(PROGRAM).is_file(); + match (exist, pull_version_on_need){ + (true, _) => println!("Binary {version} exists"), + (false, false) => panic!("Binary {version} does not exist, please run with --pull-version-on-need or manually download it"), + (false, true) => { pull_binary(version).await; }, + } +} + +/// Set up a standalone etcd in docker. +pub fn setup_etcd(client_ports: Vec, peer_port: Option, etcd_version: Option<&str>) { + if std::process::Command::new("docker") + .args(["-v"]) + .status() + .is_err() + { + panic!("Docker is not installed"); + } + let peer_port = peer_port.unwrap_or(2380); + let exposed_port: Vec<_> = client_ports.iter().chain(Some(&peer_port)).collect(); + let exposed_port_str = exposed_port + .iter() + .flat_map(|p| ["-p".to_string(), format!("{p}:{p}")]) + .collect::>(); + let etcd_version = etcd_version.unwrap_or("v3.5.17"); + let etcd_image = format!("quay.io/coreos/etcd:{etcd_version}"); + let peer_url = format!("http://0.0.0.0:{peer_port}"); + let my_local_ip = local_ip_address::local_ip().unwrap(); + + let my_local_ip_str = my_local_ip.to_string(); + + let mut arg_list = vec![]; + arg_list.extend([ + "run", + "-d", + "-v", + "/usr/share/ca-certificates/:/etc/ssl/certs", + ]); + arg_list.extend(exposed_port_str.iter().map(std::ops::Deref::deref)); + arg_list.extend([ + "--name", + "etcd", + &etcd_image, + "etcd", + "-name", + "etcd0", + "-advertise-client-urls", + ]); + + let adv_client_urls = client_ports + .iter() + .map(|p| format!("http://{my_local_ip_str}:{p}")) + .collect::>() + .join(","); + + arg_list.push(&adv_client_urls); + + arg_list.extend(["-listen-client-urls"]); + + let client_ports_fmt = client_ports + .iter() + .map(|p| format!("http://0.0.0.0:{p}")) + .collect::>() + .join(","); + + arg_list.push(&client_ports_fmt); + + arg_list.push("-initial-advertise-peer-urls"); + let advertise_peer_url = format!("http://{my_local_ip_str}:{peer_port}"); + arg_list.push(&advertise_peer_url); + + arg_list.extend(["-listen-peer-urls", &peer_url]); + + arg_list.extend(["-initial-cluster-token", "etcd-cluster-1"]); + + arg_list.push("-initial-cluster"); + + let init_cluster_url = format!("etcd0=http://{my_local_ip_str}:{peer_port}"); + + arg_list.push(&init_cluster_url); + + arg_list.extend(["-initial-cluster-state", "new"]); + + let mut cmd = std::process::Command::new("docker"); + + cmd.args(arg_list); + + println!("Starting etcd with command: {:?}", cmd); + + let status = cmd.status(); + if status.is_err() { + panic!("Failed to start etcd: {:?}", status); + } else if let Ok(status) = status { + if status.success() { + println!( + "Started etcd with client ports {:?} and peer port {}, statues:{status:?}", + client_ports, peer_port + ); + } else { + panic!("Failed to start etcd: {:?}", status); + } + } +} + +/// Stop and remove the etcd container +pub fn stop_rm_etcd() { + let status = std::process::Command::new("docker") + .args(["container", "stop", "etcd"]) + .status(); + if status.is_err() { + panic!("Failed to stop etcd: {:?}", status); + } else { + println!("Stopped etcd"); + } + // rm the container + let status = std::process::Command::new("docker") + .args(["container", "rm", "etcd"]) + .status(); + if status.is_err() { + panic!("Failed to remove etcd container: {:?}", status); + } else { + println!("Removed etcd container"); + } +} + /// Get the dir of test cases. This function only works when the runner is run /// under the project's dir because it depends on some envs set by cargo. pub fn get_case_dir(case_dir: Option) -> String { diff --git a/tests/upgrade-compat/distributed/common b/tests/upgrade-compat/distributed/common new file mode 120000 index 000000000000..2b0920287dc9 --- /dev/null +++ b/tests/upgrade-compat/distributed/common @@ -0,0 +1 @@ +../standalone/common \ No newline at end of file diff --git a/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result new file mode 100644 index 000000000000..046255a641f3 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.10.2 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql new file mode 100644 index 000000000000..1907533b1592 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_0_10_2.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.10.2 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result new file mode 100644 index 000000000000..7ce230a6881d --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.11.0 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql new file mode 100644 index 000000000000..963170fdf583 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_11_0.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.11.0 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result new file mode 100644 index 000000000000..41b81f01c082 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.result @@ -0,0 +1,137 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +Affected Rows: 0 + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +Affected Rows: 0 + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +Affected Rows: 0 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + ++---------------------+-----------------------------------------------------------+ +| Table | Create Table | ++---------------------+-----------------------------------------------------------+ +| mito_system_metrics | CREATE TABLE IF NOT EXISTS "mito_system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------------+-----------------------------------------------------------+ + +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "cpu_util" DOUBLE NULL, | +| | "host" STRING NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host") | +| | ) | +| | | +| | ENGINE=metric | +| | WITH( | +| | on_physical_table = 'phy' | +| | ) | ++----------------+-----------------------------------------------------------+ + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +Affected Rows: 3 + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +Affected Rows: 3 + +SELECT * FROM mito_system_metrics; + ++-------+-------+----------+-------------+-----------+-------------------------+ +| host | idc | cpu_util | memory_util | disk_util | ts | ++-------+-------+----------+-------------+-----------+-------------------------+ +| host1 | idc_a | 11.8 | 10.3 | 10.3 | 2022-11-03T03:39:57.450 | +| host1 | idc_b | 50.0 | 66.7 | 40.6 | 2022-11-03T03:39:57.450 | +| host2 | idc_a | 80.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host3 | idc_a | 90.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host4 | idc_a | 70.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | +| host5 | idc_a | 60.0 | 70.3 | 90.0 | 2022-11-03T03:39:57.450 | ++-------+-------+----------+-------------+-----------+-------------------------+ + +SELECT * FROM system_metrics; + ++----------+-------+-------------------------+ +| cpu_util | host | ts | ++----------+-------+-------------------------+ +| 80.0 | host2 | 2022-11-03T03:39:57.450 | +| 70.0 | host4 | 2022-11-03T03:39:57.450 | +| 60.0 | host5 | 2022-11-03T03:39:57.450 | +| 90.0 | host3 | 2022-11-03T03:39:57.450 | +| 50.0 | host1 | 2022-11-03T03:39:57.450 | ++----------+-------+-------------------------+ + +DROP TABLE mito_system_metrics; + +Affected Rows: 0 + +DROP TABLE system_metrics; + +Affected Rows: 0 + +DROP TABLE phy; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql new file mode 100644 index 000000000000..9908085213ed --- /dev/null +++ b/tests/upgrade-compat/standalone/common/table_engine_v0_9_5.sql @@ -0,0 +1,60 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE mito_system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +)ENGINE=mito; + +INSERT INTO mito_system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +CREATE TABLE phy (ts timestamp time index, cpu_util double) engine=metric with ("physical_metric_table" = ""); + +CREATE TABLE system_metrics ( + host STRING, + cpu_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host), + TIME INDEX(ts) +)ENGINE=metric with ("on_physical_table" = "phy"); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host1', 11.8, 1667446797450), + ('host2', 80.0, 1667446797450), + ('host1', 50.0, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE mito_system_metrics; + +SHOW CREATE TABLE system_metrics; + +INSERT INTO mito_system_metrics +VALUES + ("host3", "idc_a", 90.0, 70.3, 90.0, 1667446797450), + ("host4", "idc_a", 70.0, 70.3, 90.0, 1667446797450), + ("host5", "idc_a", 60.0, 70.3, 90.0, 1667446797450); + +INSERT INTO system_metrics (host, cpu_util, ts) +VALUES + ('host3', 90.0, 1667446797450), + ('host4', 70.0, 1667446797450), + ('host5', 60.0, 1667446797450); + +SELECT * FROM mito_system_metrics; + +SELECT * FROM system_metrics; + +DROP TABLE mito_system_metrics; + +DROP TABLE system_metrics; + +DROP TABLE phy; diff --git a/tests/upgrade-compat/standalone/common/test_simple.result b/tests/upgrade-compat/standalone/common/test_simple.result new file mode 100644 index 000000000000..ff2c340598a1 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_simple.result @@ -0,0 +1,47 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +); + +Affected Rows: 0 + +INSERT INTO system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +Affected Rows: 3 + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE system_metrics; + ++----------------+-----------------------------------------------------------+ +| Table | Create Table | ++----------------+-----------------------------------------------------------+ +| system_metrics | CREATE TABLE IF NOT EXISTS "system_metrics" ( | +| | "host" STRING NULL, | +| | "idc" STRING NULL, | +| | "cpu_util" DOUBLE NULL, | +| | "memory_util" DOUBLE NULL, | +| | "disk_util" DOUBLE NULL, | +| | "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), | +| | TIME INDEX ("ts"), | +| | PRIMARY KEY ("host", "idc") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++----------------+-----------------------------------------------------------+ + +DROP TABLE system_metrics; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/test_simple.sql b/tests/upgrade-compat/standalone/common/test_simple.sql new file mode 100644 index 000000000000..0f8daa0985ab --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_simple.sql @@ -0,0 +1,22 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE system_metrics ( + host STRING, + idc STRING, + cpu_util DOUBLE, + memory_util DOUBLE, + disk_util DOUBLE, + ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP(), + PRIMARY KEY(host, idc), + TIME INDEX(ts) +); + +INSERT INTO system_metrics +VALUES + ("host1", "idc_a", 11.8, 10.3, 10.3, 1667446797450), + ("host2", "idc_a", 80.0, 70.3, 90.0, 1667446797450), + ("host1", "idc_b", 50.0, 66.7, 40.6, 1667446797450); + +-- SQLNESS ARG version=latest +SHOW CREATE TABLE system_metrics; + +DROP TABLE system_metrics; diff --git a/tests/upgrade-compat/standalone/common/test_ttl.result b/tests/upgrade-compat/standalone/common/test_ttl.result new file mode 100644 index 000000000000..d06bc629b668 --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_ttl.result @@ -0,0 +1,153 @@ +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE test_ttl_0s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '0 second'); + +Affected Rows: 0 + +CREATE TABLE test_ttl_1s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '1 second'); + +Affected Rows: 0 + +CREATE TABLE test_ttl_none(ts TIMESTAMP TIME INDEX, val INT); + +Affected Rows: 0 + +CREATE DATABASE ttl_db_1s WITH (ttl = '1 second'); + +Affected Rows: 1 + +CREATE DATABASE ttl_db_0s WITH (ttl = '0 second'); + +Affected Rows: 1 + +CREATE DATABASE ttl_db_none; + +Affected Rows: 1 + +-- SQLNESS ARG version=latest +SHOW TABLES; + ++---------------+ +| Tables | ++---------------+ +| numbers | +| test_ttl_0s | +| test_ttl_1s | +| test_ttl_none | ++---------------+ + +SHOW CREATE TABLE test_ttl_1s; + ++-------------+--------------------------------------------+ +| Table | Create Table | ++-------------+--------------------------------------------+ +| test_ttl_1s | CREATE TABLE IF NOT EXISTS "test_ttl_1s" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | ttl = '1s' | +| | ) | ++-------------+--------------------------------------------+ + +SHOW CREATE TABLE test_ttl_0s; + ++-------------+--------------------------------------------+ +| Table | Create Table | ++-------------+--------------------------------------------+ +| test_ttl_0s | CREATE TABLE IF NOT EXISTS "test_ttl_0s" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | WITH( | +| | ttl = '0s' | +| | ) | ++-------------+--------------------------------------------+ + +SHOW CREATE TABLE test_ttl_none; + ++---------------+----------------------------------------------+ +| Table | Create Table | ++---------------+----------------------------------------------+ +| test_ttl_none | CREATE TABLE IF NOT EXISTS "test_ttl_none" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "val" INT NULL, | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------+----------------------------------------------+ + +DROP TABLE test_ttl_1s; + +Affected Rows: 0 + +DROP TABLE test_ttl_0s; + +Affected Rows: 0 + +DROP TABLE test_ttl_none; + +Affected Rows: 0 + +SHOW DATABASES; + ++--------------------+ +| Database | ++--------------------+ +| greptime_private | +| information_schema | +| public | +| ttl_db_0s | +| ttl_db_1s | +| ttl_db_none | ++--------------------+ + +SHOW CREATE DATABASE ttl_db_1s; + ++-----------+-----------------------------------------+ +| Database | Create Database | ++-----------+-----------------------------------------+ +| ttl_db_1s | CREATE DATABASE IF NOT EXISTS ttl_db_1s | +| | WITH( | +| | ttl = '1s' | +| | ) | ++-----------+-----------------------------------------+ + +SHOW CREATE DATABASE ttl_db_0s; + ++-----------+-----------------------------------------+ +| Database | Create Database | ++-----------+-----------------------------------------+ +| ttl_db_0s | CREATE DATABASE IF NOT EXISTS ttl_db_0s | +| | WITH( | +| | ttl = '0s' | +| | ) | ++-----------+-----------------------------------------+ + +SHOW CREATE DATABASE ttl_db_none; + ++-------------+-------------------------------------------+ +| Database | Create Database | ++-------------+-------------------------------------------+ +| ttl_db_none | CREATE DATABASE IF NOT EXISTS ttl_db_none | ++-------------+-------------------------------------------+ + +DROP DATABASE ttl_db_1s; + +Affected Rows: 0 + +DROP DATABASE ttl_db_0s; + +Affected Rows: 0 + +DROP DATABASE ttl_db_none; + +Affected Rows: 0 + diff --git a/tests/upgrade-compat/standalone/common/test_ttl.sql b/tests/upgrade-compat/standalone/common/test_ttl.sql new file mode 100644 index 000000000000..3462fd22444f --- /dev/null +++ b/tests/upgrade-compat/standalone/common/test_ttl.sql @@ -0,0 +1,42 @@ + +-- SQLNESS ARG version=v0.9.5 +CREATE TABLE test_ttl_0s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '0 second'); + +CREATE TABLE test_ttl_1s(ts TIMESTAMP TIME INDEX, val INT) WITH (ttl = '1 second'); + +CREATE TABLE test_ttl_none(ts TIMESTAMP TIME INDEX, val INT); + +CREATE DATABASE ttl_db_1s WITH (ttl = '1 second'); + +CREATE DATABASE ttl_db_0s WITH (ttl = '0 second'); + +CREATE DATABASE ttl_db_none; + +-- SQLNESS ARG version=latest +SHOW TABLES; + +SHOW CREATE TABLE test_ttl_1s; + +SHOW CREATE TABLE test_ttl_0s; + +SHOW CREATE TABLE test_ttl_none; + +DROP TABLE test_ttl_1s; + +DROP TABLE test_ttl_0s; + +DROP TABLE test_ttl_none; + +SHOW DATABASES; + +SHOW CREATE DATABASE ttl_db_1s; + +SHOW CREATE DATABASE ttl_db_0s; + +SHOW CREATE DATABASE ttl_db_none; + +DROP DATABASE ttl_db_1s; + +DROP DATABASE ttl_db_0s; + +DROP DATABASE ttl_db_none; From c33cf593983b9ea653709e3ab4778571ef18129b Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Tue, 17 Dec 2024 15:06:07 +0800 Subject: [PATCH 37/59] perf: avoid holding memtable during compaction (#5157) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * perf/avoid-holding-memtable-during-compaction: Refactor Compaction Version Handling • Introduced CompactionVersion struct to encapsulate region version details for compaction, removing dependency on VersionRef. • Updated CompactionRequest and CompactionRegion to use CompactionVersion. • Modified open_compaction_region to construct CompactionVersion without memtables. • Adjusted WindowedCompactionPicker to work with CompactionVersion. • Enhanced flush logic in WriteBufferManager to improve memory usage checks and logging. * reformat code * chore: change log level * reformat code --------- Co-authored-by: Yingwen --- src/mito2/src/compaction.rs | 8 ++-- src/mito2/src/compaction/compactor.rs | 66 +++++++++++++++------------ src/mito2/src/compaction/window.rs | 40 ++++++---------- src/mito2/src/flush.rs | 21 +++++---- 4 files changed, 70 insertions(+), 65 deletions(-) diff --git a/src/mito2/src/compaction.rs b/src/mito2/src/compaction.rs index 5236e0d616dc..7fdd32aa2721 100644 --- a/src/mito2/src/compaction.rs +++ b/src/mito2/src/compaction.rs @@ -44,7 +44,7 @@ use tokio::sync::mpsc::{self, Sender}; use crate::access_layer::AccessLayerRef; use crate::cache::CacheManagerRef; -use crate::compaction::compactor::{CompactionRegion, DefaultCompactor}; +use crate::compaction::compactor::{CompactionRegion, CompactionVersion, DefaultCompactor}; use crate::compaction::picker::{new_picker, CompactionTask}; use crate::compaction::task::CompactionTaskImpl; use crate::config::MitoConfig; @@ -59,7 +59,7 @@ use crate::read::scan_region::ScanInput; use crate::read::seq_scan::SeqScan; use crate::read::BoxedBatchReader; use crate::region::options::MergeMode; -use crate::region::version::{VersionControlRef, VersionRef}; +use crate::region::version::VersionControlRef; use crate::region::ManifestContextRef; use crate::request::{OptionOutputTx, OutputTx, WorkerRequest}; use crate::schedule::remote_job_scheduler::{ @@ -73,7 +73,7 @@ use crate::worker::WorkerListener; /// Region compaction request. pub struct CompactionRequest { pub(crate) engine_config: Arc, - pub(crate) current_version: VersionRef, + pub(crate) current_version: CompactionVersion, pub(crate) access_layer: AccessLayerRef, /// Sender to send notification to the region worker. pub(crate) request_sender: mpsc::Sender, @@ -522,7 +522,7 @@ impl CompactionStatus { listener: WorkerListener, schema_metadata_manager: SchemaMetadataManagerRef, ) -> CompactionRequest { - let current_version = self.version_control.current().version; + let current_version = CompactionVersion::from(self.version_control.current().version); let start_time = Instant::now(); let mut req = CompactionRequest { engine_config, diff --git a/src/mito2/src/compaction/compactor.rs b/src/mito2/src/compaction/compactor.rs index 91ab34c961cf..e2499140fd61 100644 --- a/src/mito2/src/compaction/compactor.rs +++ b/src/mito2/src/compaction/compactor.rs @@ -35,12 +35,10 @@ use crate::error::{EmptyRegionDirSnafu, JoinSnafu, ObjectStoreNotFoundSnafu, Res use crate::manifest::action::{RegionEdit, RegionMetaAction, RegionMetaActionList}; use crate::manifest::manager::{RegionManifestManager, RegionManifestOptions}; use crate::manifest::storage::manifest_compress_type; -use crate::memtable::time_partition::TimePartitions; -use crate::memtable::MemtableBuilderProvider; use crate::read::Source; use crate::region::opener::new_manifest_dir; use crate::region::options::RegionOptions; -use crate::region::version::{VersionBuilder, VersionRef}; +use crate::region::version::VersionRef; use crate::region::{ManifestContext, RegionLeaderState, RegionRoleState}; use crate::schedule::scheduler::LocalScheduler; use crate::sst::file::{FileMeta, IndexType}; @@ -48,6 +46,34 @@ use crate::sst::file_purger::LocalFilePurger; use crate::sst::index::intermediate::IntermediateManager; use crate::sst::index::puffin_manager::PuffinManagerFactory; use crate::sst::parquet::WriteOptions; +use crate::sst::version::{SstVersion, SstVersionRef}; + +/// Region version for compaction that does not hold memtables. +#[derive(Clone)] +pub struct CompactionVersion { + /// Metadata of the region. + /// + /// Altering metadata isn't frequent, storing metadata in Arc to allow sharing + /// metadata and reuse metadata when creating a new `Version`. + pub(crate) metadata: RegionMetadataRef, + /// Options of the region. + pub(crate) options: RegionOptions, + /// SSTs of the region. + pub(crate) ssts: SstVersionRef, + /// Inferred compaction time window. + pub(crate) compaction_time_window: Option, +} + +impl From for CompactionVersion { + fn from(value: VersionRef) -> Self { + Self { + metadata: value.metadata.clone(), + options: value.options.clone(), + ssts: value.ssts.clone(), + compaction_time_window: value.compaction_time_window, + } + } +} /// CompactionRegion represents a region that needs to be compacted. /// It's the subset of MitoRegion. @@ -62,7 +88,7 @@ pub struct CompactionRegion { pub(crate) cache_manager: CacheManagerRef, pub(crate) access_layer: AccessLayerRef, pub(crate) manifest_ctx: Arc, - pub(crate) current_version: VersionRef, + pub(crate) current_version: CompactionVersion, pub(crate) file_purger: Option>, pub(crate) ttl: Option, } @@ -147,30 +173,14 @@ pub async fn open_compaction_region( }; let current_version = { - let memtable_builder = MemtableBuilderProvider::new(None, Arc::new(mito_config.clone())) - .builder_for_options( - req.region_options.memtable.as_ref(), - req.region_options.need_dedup(), - req.region_options.merge_mode(), - ); - - // Initial memtable id is 0. - let mutable = Arc::new(TimePartitions::new( - region_metadata.clone(), - memtable_builder.clone(), - 0, - req.region_options.compaction.time_window(), - )); - - let version = VersionBuilder::new(region_metadata.clone(), mutable) - .add_files(file_purger.clone(), manifest.files.values().cloned()) - .flushed_entry_id(manifest.flushed_entry_id) - .flushed_sequence(manifest.flushed_sequence) - .truncated_entry_id(manifest.truncated_entry_id) - .compaction_time_window(manifest.compaction_time_window) - .options(req.region_options.clone()) - .build(); - Arc::new(version) + let mut ssts = SstVersion::new(); + ssts.add_files(file_purger.clone(), manifest.files.values().cloned()); + CompactionVersion { + metadata: region_metadata.clone(), + options: req.region_options.clone(), + ssts: Arc::new(ssts), + compaction_time_window: manifest.compaction_time_window, + } }; let ttl = find_ttl( diff --git a/src/mito2/src/compaction/window.rs b/src/mito2/src/compaction/window.rs index f16b8e4c95d3..10bdb47297d5 100644 --- a/src/mito2/src/compaction/window.rs +++ b/src/mito2/src/compaction/window.rs @@ -23,10 +23,9 @@ use common_time::Timestamp; use store_api::storage::RegionId; use crate::compaction::buckets::infer_time_bucket; -use crate::compaction::compactor::CompactionRegion; +use crate::compaction::compactor::{CompactionRegion, CompactionVersion}; use crate::compaction::picker::{Picker, PickerOutput}; use crate::compaction::{get_expired_ssts, CompactionOutput}; -use crate::region::version::VersionRef; use crate::sst::file::{FileHandle, FileId}; /// Compaction picker that splits the time range of all involved files to windows, and merges @@ -48,7 +47,11 @@ impl WindowedCompactionPicker { // use persisted window. If persist window is not present, we check the time window // provided while creating table. If all of those are absent, we infer the window // from files in level0. - fn calculate_time_window(&self, region_id: RegionId, current_version: &VersionRef) -> i64 { + fn calculate_time_window( + &self, + region_id: RegionId, + current_version: &CompactionVersion, + ) -> i64 { self.compaction_time_window_seconds .or(current_version .compaction_time_window @@ -67,7 +70,7 @@ impl WindowedCompactionPicker { fn pick_inner( &self, region_id: RegionId, - current_version: &VersionRef, + current_version: &CompactionVersion, current_time: Timestamp, ) -> (Vec, Vec, i64) { let time_window = self.calculate_time_window(region_id, current_version); @@ -205,28 +208,19 @@ mod tests { use common_time::Timestamp; use store_api::storage::RegionId; + use crate::compaction::compactor::CompactionVersion; use crate::compaction::window::{file_time_bucket_span, WindowedCompactionPicker}; - use crate::memtable::partition_tree::{PartitionTreeConfig, PartitionTreeMemtableBuilder}; - use crate::memtable::time_partition::TimePartitions; - use crate::memtable::version::MemtableVersion; use crate::region::options::RegionOptions; - use crate::region::version::{Version, VersionRef}; use crate::sst::file::{FileId, FileMeta, Level}; use crate::sst::version::SstVersion; use crate::test_util::memtable_util::metadata_for_test; use crate::test_util::NoopFilePurger; - fn build_version(files: &[(FileId, i64, i64, Level)], ttl: Option) -> VersionRef { + fn build_version( + files: &[(FileId, i64, i64, Level)], + ttl: Option, + ) -> CompactionVersion { let metadata = metadata_for_test(); - let memtables = Arc::new(MemtableVersion::new(Arc::new(TimePartitions::new( - metadata.clone(), - Arc::new(PartitionTreeMemtableBuilder::new( - PartitionTreeConfig::default(), - None, - )), - 0, - None, - )))); let file_purger_ref = Arc::new(NoopFilePurger); let mut ssts = SstVersion::new(); @@ -244,14 +238,9 @@ mod tests { }), ); - Arc::new(Version { + CompactionVersion { metadata, - memtables, ssts: Arc::new(ssts), - flushed_entry_id: 0, - flushed_sequence: 0, - truncated_entry_id: None, - compaction_time_window: None, options: RegionOptions { ttl: ttl.map(|t| t.into()), compaction: Default::default(), @@ -262,7 +251,8 @@ mod tests { memtable: None, merge_mode: None, }, - }) + compaction_time_window: None, + } } #[test] diff --git a/src/mito2/src/flush.rs b/src/mito2/src/flush.rs index 09f45ca4f724..b522f225f9f0 100644 --- a/src/mito2/src/flush.rs +++ b/src/mito2/src/flush.rs @@ -18,7 +18,7 @@ use std::collections::HashMap; use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; -use common_telemetry::{debug, error, info}; +use common_telemetry::{debug, error, info, trace}; use smallvec::SmallVec; use snafu::ResultExt; use store_api::storage::RegionId; @@ -141,17 +141,22 @@ impl WriteBufferManager for WriteBufferManagerImpl { // If the memory exceeds the buffer size, we trigger more aggressive // flush. But if already more than half memory is being flushed, // triggering more flush may not help. We will hold it instead. - if memory_usage >= self.global_write_buffer_size - && mutable_memtable_memory_usage >= self.global_write_buffer_size / 2 - { - debug!( + if memory_usage >= self.global_write_buffer_size { + if mutable_memtable_memory_usage >= self.global_write_buffer_size / 2 { + debug!( "Engine should flush (over total limit), memory_usage: {}, global_write_buffer_size: {}, \ mutable_usage: {}.", memory_usage, self.global_write_buffer_size, - mutable_memtable_memory_usage, - ); - return true; + mutable_memtable_memory_usage); + return true; + } else { + trace!( + "Engine won't flush, memory_usage: {}, global_write_buffer_size: {}, mutable_usage: {}.", + memory_usage, + self.global_write_buffer_size, + mutable_memtable_memory_usage); + } } false From 18e8c45384e506cdea9a3c4bddc010fca1e7f10d Mon Sep 17 00:00:00 2001 From: LFC <990479+MichaelScofield@users.noreply.github.com> Date: Wed, 18 Dec 2024 10:42:33 +0800 Subject: [PATCH 38/59] refactor: produce BatchBuilder from a Batch to modify it again (#5186) chore: pub some mods --- src/mito2/src/read.rs | 12 ++++++++++++ src/mito2/src/sst/parquet.rs | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/mito2/src/read.rs b/src/mito2/src/read.rs index d8ac5ce46b62..c4de103f1000 100644 --- a/src/mito2/src/read.rs +++ b/src/mito2/src/read.rs @@ -861,6 +861,18 @@ impl BatchBuilder { } } +impl From for BatchBuilder { + fn from(batch: Batch) -> Self { + Self { + primary_key: batch.primary_key, + timestamps: Some(batch.timestamps), + sequences: Some(batch.sequences), + op_types: Some(batch.op_types), + fields: batch.fields, + } + } +} + /// Async [Batch] reader and iterator wrapper. /// /// This is the data source for SST writers or internal readers. diff --git a/src/mito2/src/sst/parquet.rs b/src/mito2/src/sst/parquet.rs index ae51a0d37c29..3dd53ba645f2 100644 --- a/src/mito2/src/sst/parquet.rs +++ b/src/mito2/src/sst/parquet.rs @@ -24,7 +24,7 @@ use crate::sst::index::IndexOutput; use crate::sst::DEFAULT_WRITE_BUFFER_SIZE; pub(crate) mod file_range; -pub(crate) mod format; +pub mod format; pub(crate) mod helper; pub(crate) mod metadata; mod page_reader; From 7d1bcc9d494c2f3f0bedea23ffab331f8a8df88a Mon Sep 17 00:00:00 2001 From: Yohan Wal Date: Wed, 18 Dec 2024 11:45:38 +0800 Subject: [PATCH 39/59] feat: introduce Buffer for non-continuous bytes (#5164) * feat: introduce Buffer for non-continuous bytes * Update src/mito2/src/cache/index.rs Co-authored-by: Weny Xu * chore: apply review comments * refactor: use opendal::Buffer --------- Co-authored-by: Weny Xu --- Cargo.lock | 1 + src/common/base/Cargo.toml | 1 + src/index/src/inverted_index/format/reader.rs | 3 +- .../src/inverted_index/format/reader/blob.rs | 6 +- src/mito2/src/cache/index.rs | 99 ++++++++----------- 5 files changed, 46 insertions(+), 64 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index ea2931f09808..a0225cf27dbe 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1904,6 +1904,7 @@ dependencies = [ "futures", "paste", "pin-project", + "rand", "serde", "snafu 0.8.5", "tokio", diff --git a/src/common/base/Cargo.toml b/src/common/base/Cargo.toml index 465599974dae..2d35ad5d31ad 100644 --- a/src/common/base/Cargo.toml +++ b/src/common/base/Cargo.toml @@ -17,6 +17,7 @@ common-macro.workspace = true futures.workspace = true paste = "1.0" pin-project.workspace = true +rand.workspace = true serde = { version = "1.0", features = ["derive"] } snafu.workspace = true tokio.workspace = true diff --git a/src/index/src/inverted_index/format/reader.rs b/src/index/src/inverted_index/format/reader.rs index 904681d5f40a..21e5487d1e42 100644 --- a/src/index/src/inverted_index/format/reader.rs +++ b/src/index/src/inverted_index/format/reader.rs @@ -16,6 +16,7 @@ use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; +use bytes::Bytes; use common_base::BitVec; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::ResultExt; @@ -35,7 +36,7 @@ pub trait InvertedIndexReader: Send { async fn range_read(&mut self, offset: u64, size: u32) -> Result>; /// Reads the bytes in the given ranges. - async fn read_vec(&mut self, ranges: &[Range]) -> Result>>; + async fn read_vec(&mut self, ranges: &[Range]) -> Result>; /// Retrieves metadata of all inverted indices stored within the blob. async fn metadata(&mut self) -> Result>; diff --git a/src/index/src/inverted_index/format/reader/blob.rs b/src/index/src/inverted_index/format/reader/blob.rs index 371655d535f3..fcaa63773d93 100644 --- a/src/index/src/inverted_index/format/reader/blob.rs +++ b/src/index/src/inverted_index/format/reader/blob.rs @@ -16,6 +16,7 @@ use std::ops::Range; use std::sync::Arc; use async_trait::async_trait; +use bytes::Bytes; use common_base::range_read::RangeReader; use greptime_proto::v1::index::InvertedIndexMetas; use snafu::{ensure, ResultExt}; @@ -60,9 +61,8 @@ impl InvertedIndexReader for InvertedIndexBlobReader { Ok(buf.into()) } - async fn read_vec(&mut self, ranges: &[Range]) -> Result>> { - let bufs = self.source.read_vec(ranges).await.context(CommonIoSnafu)?; - Ok(bufs.into_iter().map(|buf| buf.into()).collect()) + async fn read_vec(&mut self, ranges: &[Range]) -> Result> { + self.source.read_vec(ranges).await.context(CommonIoSnafu) } async fn metadata(&mut self) -> Result> { diff --git a/src/mito2/src/cache/index.rs b/src/mito2/src/cache/index.rs index e25fb22dcbf5..de39ea3784b6 100644 --- a/src/mito2/src/cache/index.rs +++ b/src/mito2/src/cache/index.rs @@ -17,10 +17,12 @@ use std::sync::Arc; use api::v1::index::InvertedIndexMetas; use async_trait::async_trait; +use bytes::Bytes; use common_base::BitVec; use index::inverted_index::error::DecodeFstSnafu; use index::inverted_index::format::reader::InvertedIndexReader; use index::inverted_index::FstMap; +use object_store::Buffer; use prost::Message; use snafu::ResultExt; @@ -68,15 +70,14 @@ where if keys.is_empty() { return Ok(Vec::new()); } - // TODO: Can be replaced by an uncontinuous structure like opendal::Buffer. let mut data = Vec::with_capacity(keys.len()); - data.resize(keys.len(), Arc::new(Vec::new())); + data.resize(keys.len(), Bytes::new()); let mut cache_miss_range = vec![]; let mut cache_miss_idx = vec![]; let last_index = keys.len() - 1; // TODO: Avoid copy as much as possible. - for (i, index) in keys.clone().into_iter().enumerate() { - match self.cache.get_index(&index) { + for (i, index) in keys.iter().enumerate() { + match self.cache.get_index(index) { Some(page) => { CACHE_HIT.with_label_values(&[INDEX_CONTENT_TYPE]).inc(); data[i] = page; @@ -97,24 +98,19 @@ where if !cache_miss_range.is_empty() { let pages = self.inner.read_vec(&cache_miss_range).await?; for (i, page) in cache_miss_idx.into_iter().zip(pages.into_iter()) { - let page = Arc::new(page); let key = keys[i].clone(); data[i] = page.clone(); self.cache.put_index(key, page.clone()); } } - let mut result = Vec::with_capacity(size as usize); - data.iter().enumerate().for_each(|(i, page)| { - let range = if i == 0 { - IndexDataPageKey::calculate_first_page_range(offset, size, self.cache.page_size) - } else if i == last_index { - IndexDataPageKey::calculate_last_page_range(offset, size, self.cache.page_size) - } else { - 0..self.cache.page_size as usize - }; - result.extend_from_slice(&page[range]); - }); - Ok(result) + let buffer = Buffer::from_iter(data.into_iter()); + Ok(buffer + .slice(IndexDataPageKey::calculate_range( + offset, + size, + self.cache.page_size, + )) + .to_vec()) } } @@ -131,7 +127,7 @@ impl InvertedIndexReader for CachedInvertedIndexBlobRead async fn read_vec( &mut self, ranges: &[Range], - ) -> index::inverted_index::error::Result>> { + ) -> index::inverted_index::error::Result> { self.inner.read_vec(ranges).await } @@ -190,31 +186,19 @@ impl IndexDataPageKey { (end_page + 1 - start_page) as u32 } - /// Computes the byte range in the first page based on the offset and size. - /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the first page range is 1000..4096. - fn calculate_first_page_range(offset: u64, size: u32, page_size: u64) -> Range { + /// Calculates the byte range for data retrieval based on the specified offset and size. + /// + /// This function determines the starting and ending byte positions required for reading data. + /// For example, with an offset of 5000 and a size of 5000, using a PAGE_SIZE of 4096, + /// the resulting byte range will be 904..5904. This indicates that: + /// - The reader will first access fixed-size pages [4096, 8192) and [8192, 12288). + /// - To read the range [5000..10000), it only needs to fetch bytes within the range [904, 5904) across two pages. + fn calculate_range(offset: u64, size: u32, page_size: u64) -> Range { let start = (offset % page_size) as usize; - let end = if size > page_size as u32 - start as u32 { - page_size as usize - } else { - start + size as usize - }; + let end = start + size as usize; start..end } - /// Computes the byte range in the last page based on the offset and size. - /// For example, if offset is 1000 and size is 5000 with PAGE_SIZE of 4096, the last page range is 0..1904. - fn calculate_last_page_range(offset: u64, size: u32, page_size: u64) -> Range { - let offset = offset as usize; - let size = size as usize; - let page_size = page_size as usize; - if (offset + size) % page_size == 0 { - 0..page_size - } else { - 0..((offset + size) % page_size) - } - } - /// Generates a vector of IndexKey instances for the pages that a given offset and size span. fn generate_page_keys(file_id: FileId, offset: u64, size: u32, page_size: u64) -> Vec { let start_page = Self::calculate_page_id(offset, page_size); @@ -234,7 +218,7 @@ pub struct InvertedIndexCache { /// Cache for inverted index metadata index_metadata: moka::sync::Cache>, /// Cache for inverted index content. - index: moka::sync::Cache>>, + index: moka::sync::Cache, // Page size for index content. page_size: u64, } @@ -284,11 +268,11 @@ impl InvertedIndexCache { self.index_metadata.insert(key, metadata) } - pub fn get_index(&self, key: &IndexDataPageKey) -> Option>> { + pub fn get_index(&self, key: &IndexDataPageKey) -> Option { self.index.get(key) } - pub fn put_index(&self, key: IndexDataPageKey, value: Arc>) { + pub fn put_index(&self, key: IndexDataPageKey, value: Bytes) { CACHE_BYTES .with_label_values(&[INDEX_CONTENT_TYPE]) .add(index_content_weight(&key, &value).into()); @@ -302,7 +286,7 @@ fn index_metadata_weight(k: &IndexMetadataKey, v: &Arc) -> u } /// Calculates weight for index content. -fn index_content_weight(k: &IndexDataPageKey, v: &Arc>) -> u32 { +fn index_content_weight(k: &IndexDataPageKey, v: &Bytes) -> u32 { (k.file_id.as_bytes().len() + v.len()) as u32 } @@ -331,6 +315,9 @@ mod test { use crate::sst::index::store::InstrumentedStore; use crate::test_util::TestEnv; + // Repeat times for following little fuzz tests. + const FUZZ_REPEAT_TIMES: usize = 100; + // Fuzz test for index data page key #[test] fn fuzz_index_calculation() { @@ -340,7 +327,7 @@ mod test { rng.fill_bytes(&mut data); let file_id = FileId::random(); - for _ in 0..100 { + for _ in 0..FUZZ_REPEAT_TIMES { let offset = rng.gen_range(0..data.len() as u64); let size = rng.gen_range(0..data.len() as u32 - offset as u32); let page_size: usize = rng.gen_range(1..1024); @@ -349,32 +336,24 @@ mod test { IndexDataPageKey::generate_page_keys(file_id, offset, size, page_size as u64); let page_num = indexes.len(); let mut read = Vec::with_capacity(size as usize); - let last_index = indexes.len() - 1; - for (i, key) in indexes.into_iter().enumerate() { + for key in indexes.into_iter() { let start = key.page_id as usize * page_size; let page = if start + page_size < data.len() { &data[start..start + page_size] } else { &data[start..] }; - let range = if i == 0 { - // first page range - IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64) - } else if i == last_index { - // last page range. when the first page is the last page, the range is not used. - IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64) - } else { - 0..page_size - }; - read.extend_from_slice(&page[range]); + read.extend_from_slice(page); } let expected_range = offset as usize..(offset + size as u64 as u64) as usize; + let read = + read[IndexDataPageKey::calculate_range(offset, size, page_size as u64)].to_vec(); if read != data.get(expected_range).unwrap() { panic!( - "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nfirst page range: {:?}, last page range: {:?}, page num: {}", + "fuzz_read_index failed, offset: {}, size: {}, page_size: {}\nread len: {}, expected len: {}\nrange: {:?}, page num: {}", offset, size, page_size, read.len(), size as usize, - IndexDataPageKey::calculate_first_page_range(offset, size, page_size as u64), - IndexDataPageKey::calculate_last_page_range(offset, size, page_size as u64), page_num + IndexDataPageKey::calculate_range(offset, size, page_size as u64), + page_num ); } } @@ -519,7 +498,7 @@ mod test { // fuzz test let mut rng = rand::thread_rng(); - for _ in 0..100 { + for _ in 0..FUZZ_REPEAT_TIMES { let offset = rng.gen_range(0..file_size); let size = rng.gen_range(0..file_size as u32 - offset as u32); let expected = cached_reader.range_read(offset, size).await.unwrap(); From 266919c226f4da5296c75797169843094b221f4f Mon Sep 17 00:00:00 2001 From: Lanqing Yang Date: Tue, 17 Dec 2024 22:10:59 -0800 Subject: [PATCH 40/59] fix: display inverted and fulltext index in show index (#5169) --- .../information_schema/key_column_usage.rs | 50 ++++++++++-------- src/datatypes/src/schema/column_schema.rs | 4 ++ src/query/src/sql.rs | 23 ++++++-- .../standalone/common/show/show_index.result | 52 +++++++++++-------- .../standalone/common/show/show_index.sql | 6 ++- 5 files changed, 87 insertions(+), 48 deletions(-) diff --git a/src/catalog/src/system_schema/information_schema/key_column_usage.rs b/src/catalog/src/system_schema/information_schema/key_column_usage.rs index 56713dabba28..42cfa53fdb38 100644 --- a/src/catalog/src/system_schema/information_schema/key_column_usage.rs +++ b/src/catalog/src/system_schema/information_schema/key_column_usage.rs @@ -54,6 +54,10 @@ const INIT_CAPACITY: usize = 42; pub(crate) const PRI_CONSTRAINT_NAME: &str = "PRIMARY"; /// Time index constraint name pub(crate) const TIME_INDEX_CONSTRAINT_NAME: &str = "TIME INDEX"; +/// Inverted index constraint name +pub(crate) const INVERTED_INDEX_CONSTRAINT_NAME: &str = "INVERTED INDEX"; +/// Fulltext index constraint name +pub(crate) const FULLTEXT_INDEX_CONSTRAINT_NAME: &str = "FULLTEXT INDEX"; /// The virtual table implementation for `information_schema.KEY_COLUMN_USAGE`. pub(super) struct InformationSchemaKeyColumnUsage { @@ -216,14 +220,13 @@ impl InformationSchemaKeyColumnUsageBuilder { let mut stream = catalog_manager.tables(&catalog_name, &schema_name, None); while let Some(table) = stream.try_next().await? { - let mut primary_constraints = vec![]; - let table_info = table.table_info(); let table_name = &table_info.name; let keys = &table_info.meta.primary_key_indices; let schema = table.schema(); for (idx, column) in schema.column_schemas().iter().enumerate() { + let mut constraints = vec![]; if column.is_time_index() { self.add_key_column_usage( &predicates, @@ -236,30 +239,31 @@ impl InformationSchemaKeyColumnUsageBuilder { 1, //always 1 for time index ); } + // TODO(dimbtp): foreign key constraint not supported yet if keys.contains(&idx) { - primary_constraints.push(( - catalog_name.clone(), - schema_name.clone(), - table_name.to_string(), - column.name.clone(), - )); + constraints.push(PRI_CONSTRAINT_NAME); + } + if column.is_inverted_indexed() { + constraints.push(INVERTED_INDEX_CONSTRAINT_NAME); + } + + if column.has_fulltext_index_key() { + constraints.push(FULLTEXT_INDEX_CONSTRAINT_NAME); } - // TODO(dimbtp): foreign key constraint not supported yet - } - for (i, (catalog_name, schema_name, table_name, column_name)) in - primary_constraints.into_iter().enumerate() - { - self.add_key_column_usage( - &predicates, - &schema_name, - PRI_CONSTRAINT_NAME, - &catalog_name, - &schema_name, - &table_name, - &column_name, - i as u32 + 1, - ); + if !constraints.is_empty() { + let aggregated_constraints = constraints.join(", "); + self.add_key_column_usage( + &predicates, + &schema_name, + &aggregated_constraints, + &catalog_name, + &schema_name, + table_name, + &column.name, + idx as u32 + 1, + ); + } } } } diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index aee9efd9625d..7a96ab5e2bf2 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -164,6 +164,10 @@ impl ColumnSchema { .unwrap_or(false) } + pub fn has_fulltext_index_key(&self) -> bool { + self.metadata.contains_key(FULLTEXT_KEY) + } + pub fn has_inverted_index_key(&self) -> bool { self.metadata.contains_key(INVERTED_INDEX_KEY) } diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 062bd8e14e18..3337503d097c 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -40,7 +40,7 @@ use common_recordbatch::RecordBatches; use common_time::timezone::get_timezone; use common_time::Timestamp; use datafusion::common::ScalarValue; -use datafusion::prelude::SessionContext; +use datafusion::prelude::{concat_ws, SessionContext}; use datafusion_expr::{case, col, lit, Expr}; use datatypes::prelude::*; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, RawSchema, Schema}; @@ -400,6 +400,20 @@ pub async fn show_index( query_ctx.current_schema() }; + let fulltext_index_expr = case(col("constraint_name").like(lit("%FULLTEXT INDEX%"))) + .when(lit(true), lit("greptime-fulltext-index-v1")) + .otherwise(null()) + .context(error::PlanSqlSnafu)?; + + let inverted_index_expr = case( + col("constraint_name") + .like(lit("%INVERTED INDEX%")) + .or(col("constraint_name").like(lit("%PRIMARY%"))), + ) + .when(lit(true), lit("greptime-inverted-index-v1")) + .otherwise(null()) + .context(error::PlanSqlSnafu)?; + let select = vec![ // 1 as `Non_unique`: contain duplicates lit(1).alias(INDEX_NONT_UNIQUE_COLUMN), @@ -417,8 +431,11 @@ pub async fn show_index( .otherwise(lit(YES_STR)) .context(error::PlanSqlSnafu)? .alias(COLUMN_NULLABLE_COLUMN), - // TODO(dennis): maybe 'BTREE'? - lit("greptime-inverted-index-v1").alias(INDEX_INDEX_TYPE_COLUMN), + concat_ws( + lit(", "), + vec![inverted_index_expr.clone(), fulltext_index_expr.clone()], + ) + .alias(INDEX_INDEX_TYPE_COLUMN), lit("").alias(COLUMN_COMMENT_COLUMN), lit("").alias(INDEX_COMMENT_COLUMN), lit(YES_STR).alias(INDEX_VISIBLE_COLUMN), diff --git a/tests/cases/standalone/common/show/show_index.result b/tests/cases/standalone/common/show/show_index.result index 995da87c133d..6f179687dbb5 100644 --- a/tests/cases/standalone/common/show/show_index.result +++ b/tests/cases/standalone/common/show/show_index.result @@ -1,11 +1,15 @@ CREATE TABLE IF NOT EXISTS system_metrics ( host STRING, - idc STRING, + idc STRING FULLTEXT, cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, + desc1 STRING, + desc2 STRING FULLTEXT, + desc3 STRING FULLTEXT, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), + INVERTED INDEX(idc, desc1, desc2), TIME INDEX(ts) ); @@ -33,28 +37,34 @@ SHOW INDEX FROM test; +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ | test | 1 | PRIMARY | 1 | a | A | | | | YES | greptime-inverted-index-v1 | | | YES | | | test | 1 | PRIMARY | 2 | b | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| test | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | +| test | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | +-------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | PRIMARY | 2 | idc | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| system_metrics | 1 | INVERTED INDEX | 6 | desc1 | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | INVERTED INDEX, FULLTEXT INDEX | 7 | desc2 | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | FULLTEXT INDEX | 8 | desc3 | A | | | | YES | greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY, INVERTED INDEX, FULLTEXT INDEX | 2 | idc | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics in public; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | PRIMARY | 2 | idc | A | | | | YES | greptime-inverted-index-v1 | | | YES | | -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ +| system_metrics | 1 | INVERTED INDEX | 6 | desc1 | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | INVERTED INDEX, FULLTEXT INDEX | 7 | desc2 | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | FULLTEXT INDEX | 8 | desc3 | A | | | | YES | greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY | 1 | host | A | | | | YES | greptime-inverted-index-v1 | | | YES | | +| system_metrics | 1 | PRIMARY, INVERTED INDEX, FULLTEXT INDEX | 2 | idc | A | | | | YES | greptime-inverted-index-v1, greptime-fulltext-index-v1 | | | YES | | +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+-----------------------------------------+--------------+-------------+-----------+-------------+----------+--------+------+--------------------------------------------------------+---------+---------------+---------+------------+ SHOW INDEX FROM system_metrics like '%util%'; @@ -62,11 +72,11 @@ Error: 1001(Unsupported), SQL statement is not supported, keyword: like SHOW INDEX FROM system_metrics WHERE Key_name = 'TIME INDEX'; -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ -| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | greptime-inverted-index-v1 | | | YES | | -+----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+----------------------------+---------+---------------+---------+------------+ ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ +| Table | Non_unique | Key_name | Seq_in_index | Column_name | Collation | Cardinality | Sub_part | Packed | Null | Index_type | Comment | Index_comment | Visible | Expression | ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ +| system_metrics | 1 | TIME INDEX | 1 | ts | A | | | | NO | | | | YES | | ++----------------+------------+------------+--------------+-------------+-----------+-------------+----------+--------+------+------------+---------+---------------+---------+------------+ DROP TABLE system_metrics; diff --git a/tests/cases/standalone/common/show/show_index.sql b/tests/cases/standalone/common/show/show_index.sql index 3f804db3845f..f0c5894a0ad7 100644 --- a/tests/cases/standalone/common/show/show_index.sql +++ b/tests/cases/standalone/common/show/show_index.sql @@ -1,11 +1,15 @@ CREATE TABLE IF NOT EXISTS system_metrics ( host STRING, - idc STRING, + idc STRING FULLTEXT, cpu_util DOUBLE, memory_util DOUBLE, disk_util DOUBLE, + desc1 STRING, + desc2 STRING FULLTEXT, + desc3 STRING FULLTEXT, ts TIMESTAMP DEFAULT CURRENT_TIMESTAMP, PRIMARY KEY(host, idc), + INVERTED INDEX(idc, desc1, desc2), TIME INDEX(ts) ); From e662c241e62dcd88b5a51e28fbea969d2d7b5fb8 Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Wed, 18 Dec 2024 14:35:45 +0800 Subject: [PATCH 41/59] feat: impl label_join and label_replace for promql (#5153) * feat: impl label_join and label_replace for promql * chore: style * fix: dst_label is eqauls to src_label * fix: forgot to sort the results * fix: processing empty source label --- src/query/src/promql/planner.rs | 258 +++++++++++++++++- .../standalone/common/promql/label.result | 199 ++++++++++++++ .../cases/standalone/common/promql/label.sql | 55 ++++ 3 files changed, 508 insertions(+), 4 deletions(-) create mode 100644 tests/cases/standalone/common/promql/label.result create mode 100644 tests/cases/standalone/common/promql/label.sql diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 001e41ca9934..1e7bc27dab6a 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -1213,7 +1213,7 @@ impl PromPlanner { let quantile_expr = match other_input_exprs.pop_front() { Some(DfExpr::Literal(ScalarValue::Float64(Some(quantile)))) => quantile, other => UnexpectedPlanExprSnafu { - desc: format!("expect f64 literal as quantile, but found {:?}", other), + desc: format!("expected f64 literal as quantile, but found {:?}", other), } .fail()?, }; @@ -1224,7 +1224,7 @@ impl PromPlanner { Some(DfExpr::Literal(ScalarValue::Float64(Some(t)))) => t as i64, Some(DfExpr::Literal(ScalarValue::Int64(Some(t)))) => t, other => UnexpectedPlanExprSnafu { - desc: format!("expect i64 literal as t, but found {:?}", other), + desc: format!("expected i64 literal as t, but found {:?}", other), } .fail()?, }; @@ -1235,7 +1235,7 @@ impl PromPlanner { Some(DfExpr::Literal(ScalarValue::Float64(Some(sf)))) => sf, other => UnexpectedPlanExprSnafu { desc: format!( - "expect f64 literal as smoothing factor, but found {:?}", + "expected f64 literal as smoothing factor, but found {:?}", other ), } @@ -1244,7 +1244,10 @@ impl PromPlanner { let tf_exp = match other_input_exprs.pop_front() { Some(DfExpr::Literal(ScalarValue::Float64(Some(tf)))) => tf, other => UnexpectedPlanExprSnafu { - desc: format!("expect f64 literal as trend factor, but found {:?}", other), + desc: format!( + "expected f64 literal as trend factor, but found {:?}", + other + ), } .fail()?, }; @@ -1331,6 +1334,47 @@ impl PromPlanner { exprs.push(date_part_expr); ScalarFunc::GeneratedExpr } + + "label_join" => { + let (concat_expr, dst_label) = + Self::build_concat_labels_expr(&mut other_input_exprs, session_state)?; + + // Reserve the current field columns except the `dst_label`. + for value in &self.ctx.field_columns { + if *value != dst_label { + let expr = DfExpr::Column(Column::from_name(value)); + exprs.push(expr); + } + } + + // Remove it from tag columns + self.ctx.tag_columns.retain(|tag| *tag != dst_label); + + // Add the new label expr + exprs.push(concat_expr); + + ScalarFunc::GeneratedExpr + } + "label_replace" => { + let (replace_expr, dst_label) = + Self::build_regexp_replace_label_expr(&mut other_input_exprs, session_state)?; + + // Reserve the current field columns except the `dst_label`. + for value in &self.ctx.field_columns { + if *value != dst_label { + let expr = DfExpr::Column(Column::from_name(value)); + exprs.push(expr); + } + } + + // Remove it from tag columns + self.ctx.tag_columns.retain(|tag| *tag != dst_label); + + // Add the new label expr + exprs.push(replace_expr); + + ScalarFunc::GeneratedExpr + } _ => { if let Some(f) = session_state.scalar_functions().get(func.name) { ScalarFunc::DataFusionBuiltin(f.clone()) @@ -1411,6 +1455,7 @@ impl PromPlanner { // update value columns' name, and alias them to remove qualifiers let mut new_field_columns = Vec::with_capacity(exprs.len()); + exprs = exprs .into_iter() .map(|expr| { @@ -1420,11 +1465,146 @@ impl PromPlanner { }) .collect::, _>>() .context(DataFusionPlanningSnafu)?; + self.ctx.field_columns = new_field_columns; Ok(exprs) } + /// Build expr for `label_replace` function + fn build_regexp_replace_label_expr( + other_input_exprs: &mut VecDeque, + session_state: &SessionState, + ) -> Result<(DfExpr, String)> { + // label_replace(vector, dst_label, replacement, src_label, regex) + let dst_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected dst_label string literal, but found {:?}", other), + } + .fail()?, + }; + let replacement = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r, + other => UnexpectedPlanExprSnafu { + desc: format!("expected replacement string literal, but found {:?}", other), + } + .fail()?, + }; + let src_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(s)))) => s, + other => UnexpectedPlanExprSnafu { + desc: format!("expected src_label string literal, but found {:?}", other), + } + .fail()?, + }; + let regex = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(r)))) => r, + other => UnexpectedPlanExprSnafu { + desc: format!("expected regex string literal, but found {:?}", other), + } + .fail()?, + }; + + let func = session_state + .scalar_functions() + .get("regexp_replace") + .context(UnsupportedExprSnafu { + name: "regexp_replace", + })?; + + // regexp_replace(src_label, regex, replacement) + let args = vec![ + if src_label.is_empty() { + DfExpr::Literal(ScalarValue::Null) + } else { + DfExpr::Column(Column::from_name(src_label)) + }, + DfExpr::Literal(ScalarValue::Utf8(Some(regex))), + DfExpr::Literal(ScalarValue::Utf8(Some(replacement))), + ]; + + Ok(( + DfExpr::ScalarFunction(ScalarFunction { + func: func.clone(), + args, + }) + .alias(&dst_label), + dst_label, + )) + } + + /// Build expr for `label_join` function + fn build_concat_labels_expr( + other_input_exprs: &mut VecDeque, + session_state: &SessionState, + ) -> Result<(DfExpr, String)> { + // label_join(vector, dst_label, separator, src_label_1, src_label_2, ...) + + let dst_label = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected dst_label string literal, but found {:?}", other), + } + .fail()?, + }; + let separator = match other_input_exprs.pop_front() { + Some(DfExpr::Literal(ScalarValue::Utf8(Some(d)))) => d, + other => UnexpectedPlanExprSnafu { + desc: format!("expected separator string literal, but found {:?}", other), + } + .fail()?, + }; + let src_labels = other_input_exprs + .clone() + .into_iter() + .map(|expr| { + // Cast source label into column + match expr { + DfExpr::Literal(ScalarValue::Utf8(Some(label))) => { + if label.is_empty() { + Ok(DfExpr::Literal(ScalarValue::Null)) + } else { + Ok(DfExpr::Column(Column::from_name(label))) + } + } + other => UnexpectedPlanExprSnafu { + desc: format!( + "expected source label string literal, but found {:?}", + other + ), + } + .fail(), + } + }) + .collect::>>()?; + ensure!( + !src_labels.is_empty(), + FunctionInvalidArgumentSnafu { + fn_name: "label_join", + } + ); + + let func = session_state + .scalar_functions() + .get("concat_ws") + .context(UnsupportedExprSnafu { name: "concat_ws" })?; + + // concat_ws(separator, src_label_1, src_label_2, ...) as dst_label + let mut args = Vec::with_capacity(1 + src_labels.len()); + args.push(DfExpr::Literal(ScalarValue::Utf8(Some(separator)))); + args.extend(src_labels); + + Ok(( + DfExpr::ScalarFunction(ScalarFunction { + func: func.clone(), + args, + }) + .alias(&dst_label), + dst_label, + )) + } + fn create_time_index_column_expr(&self) -> Result { Ok(DfExpr::Column(Column::from_name( self.ctx @@ -3267,4 +3447,74 @@ mod test { \n TableScan: metrics [tag:Utf8, timestamp:Timestamp(Nanosecond, None), field:Float64;N]" ); } + + #[tokio::test] + async fn test_label_join() { + let prom_expr = parser::parse( + "label_join(up{tag_0='api-server'}, 'foo', ',', 'tag_1', 'tag_2', 'tag_3')", + ) + .unwrap(); + let eval_stmt = EvalStmt { + expr: prom_expr, + start: UNIX_EPOCH, + end: UNIX_EPOCH + .checked_add(Duration::from_secs(100_000)) + .unwrap(), + interval: Duration::from_secs(5), + lookback_delta: Duration::from_secs(1), + }; + + let table_provider = + build_test_table_provider(&[(DEFAULT_SCHEMA_NAME.to_string(), "up".to_string())], 4, 1) + .await; + let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) + .await + .unwrap(); + + let expected = r#"Filter: field_0 IS NOT NULL AND foo IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8] + Projection: up.timestamp, up.field_0 AS field_0, concat_ws(Utf8(","), up.tag_1, up.tag_2, up.tag_3) AS foo AS foo, up.tag_0, up.tag_1, up.tag_2, up.tag_3 [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8] + PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesDivide: tags=["tag_0", "tag_1", "tag_2", "tag_3"] [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Sort: up.tag_0 DESC NULLS LAST, up.tag_1 DESC NULLS LAST, up.tag_2 DESC NULLS LAST, up.tag_3 DESC NULLS LAST, up.timestamp DESC NULLS LAST [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Filter: up.tag_0 = Utf8("api-server") AND up.timestamp >= TimestampMillisecond(-1000, None) AND up.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + TableScan: up [tag_0:Utf8, tag_1:Utf8, tag_2:Utf8, tag_3:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"#; + + assert_eq!(plan.display_indent_schema().to_string(), expected); + } + + #[tokio::test] + async fn test_label_replace() { + let prom_expr = parser::parse( + "label_replace(up{tag_0=\"a:c\"}, \"foo\", \"$1\", \"tag_0\", \"(.*):.*\")", + ) + .unwrap(); + let eval_stmt = EvalStmt { + expr: prom_expr, + start: UNIX_EPOCH, + end: UNIX_EPOCH + .checked_add(Duration::from_secs(100_000)) + .unwrap(), + interval: Duration::from_secs(5), + lookback_delta: Duration::from_secs(1), + }; + + let table_provider = + build_test_table_provider(&[(DEFAULT_SCHEMA_NAME.to_string(), "up".to_string())], 1, 1) + .await; + let plan = PromPlanner::stmt_to_plan(table_provider, &eval_stmt, &build_session_state()) + .await + .unwrap(); + + let expected = r#"Filter: field_0 IS NOT NULL AND foo IS NOT NULL [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8] + Projection: up.timestamp, up.field_0 AS field_0, regexp_replace(up.tag_0, Utf8("(.*):.*"), Utf8("$1")) AS foo AS foo, up.tag_0 [timestamp:Timestamp(Millisecond, None), field_0:Float64;N, foo:Utf8;N, tag_0:Utf8] + PromInstantManipulate: range=[0..100000000], lookback=[1000], interval=[5000], time index=[timestamp] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesNormalize: offset=[0], time index=[timestamp], filter NaN: [false] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + PromSeriesDivide: tags=["tag_0"] [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Sort: up.tag_0 DESC NULLS LAST, up.timestamp DESC NULLS LAST [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + Filter: up.tag_0 = Utf8("a:c") AND up.timestamp >= TimestampMillisecond(-1000, None) AND up.timestamp <= TimestampMillisecond(100001000, None) [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N] + TableScan: up [tag_0:Utf8, timestamp:Timestamp(Millisecond, None), field_0:Float64;N]"#; + + assert_eq!(plan.display_indent_schema().to_string(), expected); + } } diff --git a/tests/cases/standalone/common/promql/label.result b/tests/cases/standalone/common/promql/label.result new file mode 100644 index 000000000000..42ba33ca9253 --- /dev/null +++ b/tests/cases/standalone/common/promql/label.result @@ -0,0 +1,199 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + idc STRING, + val BIGINT, + PRIMARY KEY(host, idc), +); + +Affected Rows: 0 + +INSERT INTO TABLE test VALUES + (0, 'host1', 'idc1', 1), + (0, 'host2', 'idc1', 2), + (5000, 'host1', 'idc2:zone1',3), + (5000, 'host2', 'idc2',4), + (10000, 'host1', 'idc3:zone2',5), + (10000, 'host2', 'idc3',6), + (15000, 'host1', 'idc4:zone3',7), + (15000, 'host2', 'idc4',8); + +Affected Rows: 8 + +-- Missing source labels -- +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-"); + +Error: 1004(InvalidArguments), Invalid function argument for label_join + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "host"); + ++---------------------+-----+-------+------------+ +| ts | val | host | idc | ++---------------------+-----+-------+------------+ +| 1970-01-01T00:00:00 | 1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | host1 | idc4:zone3 | ++---------------------+-----+-------+------------+ + +-- dst_label is in source labels -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "idc", "host"); + ++---------------------+-----+------------------+------------+ +| ts | val | host | idc | ++---------------------+-----+------------------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2-host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1-host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc2:zone1-host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2-host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3-host1 | idc4:zone3 | ++---------------------+-----+------------------+------------+ + +-- test the empty source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", ""); + ++---------------------+-----+------+------------+ +| ts | val | host | idc | ++---------------------+-----+------+------------+ +| 1970-01-01T00:00:00 | 1 | | idc1 | +| 1970-01-01T00:00:05 | 1 | | idc1 | +| 1970-01-01T00:00:05 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | | idc1 | +| 1970-01-01T00:00:10 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | | idc1 | +| 1970-01-01T00:00:15 | 3 | | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | | idc4:zone3 | ++---------------------+-----+------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-", "idc", "host"); + ++---------------------+-----+------------------+-------+------------+ +| ts | val | new_host | host | idc | ++---------------------+-----+------------------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2-host1 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1-host1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc2:zone1-host1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2-host1 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3-host1 | host1 | idc4:zone3 | ++---------------------+-----+------------------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+---------+-------+------------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+---------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | zone1 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | zone3 | host1 | idc4:zone3 | ++---------------------+-----+---------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*"); + ++---------------------+-----+------------+-------+------------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+------------+-------+------------+ +| 1970-01-01T00:00:00 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:05 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:10 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:10 | 5 | idc3:zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 1 | idc1 | host1 | idc1 | +| 1970-01-01T00:00:15 | 3 | idc99 | host1 | idc2:zone1 | +| 1970-01-01T00:00:15 | 5 | idc3:zone2 | host1 | idc3:zone2 | +| 1970-01-01T00:00:15 | 7 | idc4:zone3 | host1 | idc4:zone3 | ++---------------------+-----+------------+-------+------------+ + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "new_idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+---------+-------+------+ +| ts | val | new_idc | host | idc | ++---------------------+-----+---------+-------+------+ +| 1970-01-01T00:00:00 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:05 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:05 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:10 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:10 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:10 | 6 | idc3 | host2 | idc3 | +| 1970-01-01T00:00:15 | 2 | idc1 | host2 | idc1 | +| 1970-01-01T00:00:15 | 4 | idc2 | host2 | idc2 | +| 1970-01-01T00:00:15 | 6 | idc3 | host2 | idc3 | +| 1970-01-01T00:00:15 | 8 | idc4 | host2 | idc4 | ++---------------------+-----+---------+-------+------+ + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "$2", "idc", "(.*):(.*)"); + ++---------------------+-----+------+-------+ +| ts | val | idc | host | ++---------------------+-----+------+-------+ +| 1970-01-01T00:00:00 | 2 | idc1 | host2 | +| 1970-01-01T00:00:05 | 2 | idc1 | host2 | +| 1970-01-01T00:00:05 | 4 | idc2 | host2 | +| 1970-01-01T00:00:10 | 2 | idc1 | host2 | +| 1970-01-01T00:00:10 | 4 | idc2 | host2 | +| 1970-01-01T00:00:10 | 6 | idc3 | host2 | +| 1970-01-01T00:00:15 | 2 | idc1 | host2 | +| 1970-01-01T00:00:15 | 4 | idc2 | host2 | +| 1970-01-01T00:00:15 | 6 | idc3 | host2 | +| 1970-01-01T00:00:15 | 8 | idc4 | host2 | ++---------------------+-----+------+-------+ + +-- test the empty source label -- +-- TODO(dennis): we can't remove the label currently -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "", "", ""); + ++---------------------+-----+-----+-------+ +| ts | val | idc | host | ++---------------------+-----+-----+-------+ +| 1970-01-01T00:00:00 | 2 | | host2 | +| 1970-01-01T00:00:05 | 2 | | host2 | +| 1970-01-01T00:00:05 | 4 | | host2 | +| 1970-01-01T00:00:10 | 2 | | host2 | +| 1970-01-01T00:00:10 | 4 | | host2 | +| 1970-01-01T00:00:10 | 6 | | host2 | +| 1970-01-01T00:00:15 | 2 | | host2 | +| 1970-01-01T00:00:15 | 4 | | host2 | +| 1970-01-01T00:00:15 | 6 | | host2 | +| 1970-01-01T00:00:15 | 8 | | host2 | ++---------------------+-----+-----+-------+ + +DROP TABLE test; + +Affected Rows: 0 + diff --git a/tests/cases/standalone/common/promql/label.sql b/tests/cases/standalone/common/promql/label.sql new file mode 100644 index 000000000000..3b9058c27ed6 --- /dev/null +++ b/tests/cases/standalone/common/promql/label.sql @@ -0,0 +1,55 @@ +CREATE TABLE test ( + ts timestamp(3) time index, + host STRING, + idc STRING, + val BIGINT, + PRIMARY KEY(host, idc), +); + +INSERT INTO TABLE test VALUES + (0, 'host1', 'idc1', 1), + (0, 'host2', 'idc1', 2), + (5000, 'host1', 'idc2:zone1',3), + (5000, 'host2', 'idc2',4), + (10000, 'host1', 'idc3:zone2',5), + (10000, 'host2', 'idc3',6), + (15000, 'host1', 'idc4:zone3',7), + (15000, 'host2', 'idc4',8); + +-- Missing source labels -- +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-"); + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "host"); + +-- dst_label is in source labels -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", "idc", "host"); + +-- test the empty source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "host", "-", ""); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_join(test{host="host1"}, "new_host", "-", "idc", "host"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "$2", "idc", "(.*):(.*)"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host1"}, "new_idc", "idc99", "idc", "idc2.*"); + +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "new_idc", "$2", "idc", "(.*):(.*)"); + +-- dst_label is equal to source label -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "$2", "idc", "(.*):(.*)"); + +-- test the empty source label -- +-- TODO(dennis): we can't remove the label currently -- +-- SQLNESS SORT_RESULT 3 1 +TQL EVAL (0, 15, '5s') label_replace(test{host="host2"}, "idc", "", "", ""); + +DROP TABLE test; From 58d6982c939b0b85e932ce9f1e4b879a4d2f288f Mon Sep 17 00:00:00 2001 From: Yingwen Date: Wed, 18 Dec 2024 14:37:22 +0800 Subject: [PATCH 42/59] feat: do not keep MemtableRefs in ScanInput (#5184) --- src/mito2/src/memtable.rs | 11 +++++- src/mito2/src/memtable/bulk.rs | 5 +-- src/mito2/src/memtable/partition_tree.rs | 10 +++-- src/mito2/src/memtable/time_series.rs | 9 +++-- src/mito2/src/read/range.rs | 50 +++++++++--------------- src/mito2/src/read/scan_region.rs | 30 +++++++++----- src/mito2/src/read/scan_util.rs | 3 +- src/mito2/src/read/seq_scan.rs | 1 - src/mito2/src/read/unordered_scan.rs | 1 - src/mito2/src/test_util/memtable_util.rs | 6 +-- 10 files changed, 66 insertions(+), 60 deletions(-) diff --git a/src/mito2/src/memtable.rs b/src/mito2/src/memtable.rs index f7d05c621f62..6adc6eb96aec 100644 --- a/src/mito2/src/memtable.rs +++ b/src/mito2/src/memtable.rs @@ -110,6 +110,15 @@ impl MemtableStats { pub type BoxedBatchIterator = Box> + Send>; +/// Ranges in a memtable. +#[derive(Default)] +pub struct MemtableRanges { + /// Range IDs and ranges. + pub ranges: BTreeMap, + /// Statistics of the memtable at the query time. + pub stats: MemtableStats, +} + /// In memory write buffer. pub trait Memtable: Send + Sync + fmt::Debug { /// Returns the id of this memtable. @@ -139,7 +148,7 @@ pub trait Memtable: Send + Sync + fmt::Debug { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap; + ) -> MemtableRanges; /// Returns true if the memtable is empty. fn is_empty(&self) -> bool; diff --git a/src/mito2/src/memtable/bulk.rs b/src/mito2/src/memtable/bulk.rs index 46e757f3df16..96e6c70acdf9 100644 --- a/src/mito2/src/memtable/bulk.rs +++ b/src/mito2/src/memtable/bulk.rs @@ -14,7 +14,6 @@ //! Memtable implementation for bulk load -use std::collections::BTreeMap; use std::sync::{Arc, RwLock}; use store_api::metadata::RegionMetadataRef; @@ -25,7 +24,7 @@ use crate::error::Result; use crate::memtable::bulk::part::BulkPart; use crate::memtable::key_values::KeyValue; use crate::memtable::{ - BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRange, MemtableRef, MemtableStats, + BoxedBatchIterator, KeyValues, Memtable, MemtableId, MemtableRanges, MemtableRef, MemtableStats, }; #[allow(unused)] @@ -68,7 +67,7 @@ impl Memtable for BulkMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { todo!() } diff --git a/src/mito2/src/memtable/partition_tree.rs b/src/mito2/src/memtable/partition_tree.rs index 4c4b471643bd..1376f923316c 100644 --- a/src/mito2/src/memtable/partition_tree.rs +++ b/src/mito2/src/memtable/partition_tree.rs @@ -23,7 +23,6 @@ mod shard; mod shard_builder; mod tree; -use std::collections::BTreeMap; use std::fmt; use std::sync::atomic::{AtomicI64, AtomicUsize, Ordering}; use std::sync::Arc; @@ -41,7 +40,7 @@ use crate::memtable::partition_tree::tree::PartitionTree; use crate::memtable::stats::WriteMetrics; use crate::memtable::{ AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, - MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats, + MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats, }; use crate::region::options::MergeMode; @@ -176,7 +175,7 @@ impl Memtable for PartitionTreeMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { let projection = projection.map(|ids| ids.to_vec()); let builder = Box::new(PartitionTreeIterBuilder { tree: self.tree.clone(), @@ -185,7 +184,10 @@ impl Memtable for PartitionTreeMemtable { }); let context = Arc::new(MemtableRangeContext::new(self.id, builder)); - [(0, MemtableRange::new(context))].into() + MemtableRanges { + ranges: [(0, MemtableRange::new(context))].into(), + stats: self.stats(), + } } fn is_empty(&self) -> bool { diff --git a/src/mito2/src/memtable/time_series.rs b/src/mito2/src/memtable/time_series.rs index 4959c468b6db..8ef6f4412120 100644 --- a/src/mito2/src/memtable/time_series.rs +++ b/src/mito2/src/memtable/time_series.rs @@ -45,7 +45,7 @@ use crate::memtable::key_values::KeyValue; use crate::memtable::stats::WriteMetrics; use crate::memtable::{ AllocTracker, BoxedBatchIterator, BulkPart, IterBuilder, KeyValues, Memtable, MemtableBuilder, - MemtableId, MemtableRange, MemtableRangeContext, MemtableRef, MemtableStats, + MemtableId, MemtableRange, MemtableRangeContext, MemtableRanges, MemtableRef, MemtableStats, }; use crate::metrics::{READ_ROWS_TOTAL, READ_STAGE_ELAPSED}; use crate::read::dedup::LastNonNullIter; @@ -250,7 +250,7 @@ impl Memtable for TimeSeriesMemtable { &self, projection: Option<&[ColumnId]>, predicate: Option, - ) -> BTreeMap { + ) -> MemtableRanges { let projection = if let Some(projection) = projection { projection.iter().copied().collect() } else { @@ -268,7 +268,10 @@ impl Memtable for TimeSeriesMemtable { }); let context = Arc::new(MemtableRangeContext::new(self.id, builder)); - [(0, MemtableRange::new(context))].into() + MemtableRanges { + ranges: [(0, MemtableRange::new(context))].into(), + stats: self.stats(), + } } fn is_empty(&self) -> bool { diff --git a/src/mito2/src/read/range.rs b/src/mito2/src/read/range.rs index bdad5f8fef0c..1b29e196a2fe 100644 --- a/src/mito2/src/read/range.rs +++ b/src/mito2/src/read/range.rs @@ -24,7 +24,7 @@ use store_api::region_engine::PartitionRange; use crate::cache::CacheManager; use crate::error::Result; -use crate::memtable::{MemtableRange, MemtableRef}; +use crate::memtable::{MemtableRange, MemtableRanges, MemtableStats}; use crate::read::scan_region::ScanInput; use crate::sst::file::{overlaps, FileHandle, FileTimeRange}; use crate::sst::parquet::file_range::{FileRange, FileRangeContextRef}; @@ -175,7 +175,7 @@ impl RangeMeta { } } - fn push_unordered_mem_ranges(memtables: &[MemtableRef], ranges: &mut Vec) { + fn push_unordered_mem_ranges(memtables: &[MemRangeBuilder], ranges: &mut Vec) { // For append mode, we can parallelize reading memtables. for (memtable_index, memtable) in memtables.iter().enumerate() { let stats = memtable.stats(); @@ -270,7 +270,7 @@ impl RangeMeta { } } - fn push_seq_mem_ranges(memtables: &[MemtableRef], ranges: &mut Vec) { + fn push_seq_mem_ranges(memtables: &[MemRangeBuilder], ranges: &mut Vec) { // For non append-only mode, each range only contains one memtable by default. for (i, memtable) in memtables.iter().enumerate() { let stats = memtable.stats(); @@ -421,29 +421,38 @@ impl FileRangeBuilder { /// Builder to create mem ranges. pub(crate) struct MemRangeBuilder { /// Ranges of a memtable. - row_groups: BTreeMap, + ranges: MemtableRanges, } impl MemRangeBuilder { /// Builds a mem range builder from row groups. - pub(crate) fn new(row_groups: BTreeMap) -> Self { - Self { row_groups } + pub(crate) fn new(ranges: MemtableRanges) -> Self { + Self { ranges } } /// Builds mem ranges to read in the memtable. /// Negative `row_group_index` indicates all row groups. - fn build_ranges(&self, row_group_index: i64, ranges: &mut SmallVec<[MemtableRange; 2]>) { + pub(crate) fn build_ranges( + &self, + row_group_index: i64, + ranges: &mut SmallVec<[MemtableRange; 2]>, + ) { if row_group_index >= 0 { let row_group_index = row_group_index as usize; // Scans one row group. - let Some(range) = self.row_groups.get(&row_group_index) else { + let Some(range) = self.ranges.ranges.get(&row_group_index) else { return; }; ranges.push(range.clone()); } else { - ranges.extend(self.row_groups.values().cloned()); + ranges.extend(self.ranges.ranges.values().cloned()); } } + + /// Returns the statistics of the memtable. + pub(crate) fn stats(&self) -> &MemtableStats { + &self.ranges.stats + } } /// List to manages the builders to create file ranges. @@ -451,18 +460,15 @@ impl MemRangeBuilder { /// the list to different streams in the same partition. pub(crate) struct RangeBuilderList { num_memtables: usize, - mem_builders: Mutex>>, file_builders: Mutex>>>, } impl RangeBuilderList { /// Creates a new [ReaderBuilderList] with the given number of memtables and files. pub(crate) fn new(num_memtables: usize, num_files: usize) -> Self { - let mem_builders = (0..num_memtables).map(|_| None).collect(); let file_builders = (0..num_files).map(|_| None).collect(); Self { num_memtables, - mem_builders: Mutex::new(mem_builders), file_builders: Mutex::new(file_builders), } } @@ -488,26 +494,6 @@ impl RangeBuilderList { Ok(ranges) } - /// Builds mem ranges to read the row group at `index`. - pub(crate) fn build_mem_ranges( - &self, - input: &ScanInput, - index: RowGroupIndex, - ) -> SmallVec<[MemtableRange; 2]> { - let mut ranges = SmallVec::new(); - let mut mem_builders = self.mem_builders.lock().unwrap(); - match &mut mem_builders[index.index] { - Some(builder) => builder.build_ranges(index.row_group_index, &mut ranges), - None => { - let builder = input.prune_memtable(index.index); - builder.build_ranges(index.row_group_index, &mut ranges); - mem_builders[index.index] = Some(builder); - } - } - - ranges - } - fn get_file_builder(&self, index: usize) -> Option> { let file_builders = self.file_builders.lock().unwrap(); file_builders[index].clone() diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 32b8c90cda02..946ef2884132 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -24,6 +24,7 @@ use common_recordbatch::SendableRecordBatchStream; use common_telemetry::{debug, error, tracing, warn}; use common_time::range::TimestampRange; use datafusion_expr::utils::expr_to_columns; +use smallvec::SmallVec; use store_api::region_engine::{PartitionRange, RegionScannerRef}; use store_api::storage::{ScanRequest, TimeSeriesRowSelector}; use table::predicate::{build_time_range_predicate, Predicate}; @@ -35,7 +36,7 @@ use crate::cache::file_cache::FileCacheRef; use crate::cache::CacheManagerRef; use crate::config::DEFAULT_SCAN_CHANNEL_SIZE; use crate::error::Result; -use crate::memtable::MemtableRef; +use crate::memtable::MemtableRange; use crate::metrics::READ_SST_COUNT; use crate::read::compat::{self, CompatBatch}; use crate::read::projection::ProjectionMapper; @@ -328,6 +329,14 @@ impl ScanRegion { Some(p) => ProjectionMapper::new(&self.version.metadata, p.iter().copied())?, None => ProjectionMapper::all(&self.version.metadata)?, }; + // Get memtable ranges to scan. + let memtables = memtables + .into_iter() + .map(|mem| { + let ranges = mem.ranges(Some(mapper.column_ids()), Some(predicate.clone())); + MemRangeBuilder::new(ranges) + }) + .collect(); let input = ScanInput::new(self.access_layer, mapper) .with_time_range(Some(time_range)) @@ -484,8 +493,8 @@ pub(crate) struct ScanInput { time_range: Option, /// Predicate to push down. pub(crate) predicate: Option, - /// Memtables to scan. - pub(crate) memtables: Vec, + /// Memtable range builders for memtables in the time range.. + pub(crate) memtables: Vec, /// Handles to SST files to scan. pub(crate) files: Vec, /// Cache. @@ -547,9 +556,9 @@ impl ScanInput { self } - /// Sets memtables to read. + /// Sets memtable range builders. #[must_use] - pub(crate) fn with_memtables(mut self, memtables: Vec) -> Self { + pub(crate) fn with_memtables(mut self, memtables: Vec) -> Self { self.memtables = memtables; self } @@ -667,11 +676,12 @@ impl ScanInput { Ok(sources) } - /// Prunes a memtable to scan and returns the builder to build readers. - pub(crate) fn prune_memtable(&self, mem_index: usize) -> MemRangeBuilder { - let memtable = &self.memtables[mem_index]; - let row_groups = memtable.ranges(Some(self.mapper.column_ids()), self.predicate.clone()); - MemRangeBuilder::new(row_groups) + /// Builds memtable ranges to scan by `index`. + pub(crate) fn build_mem_ranges(&self, index: RowGroupIndex) -> SmallVec<[MemtableRange; 2]> { + let memtable = &self.memtables[index.index]; + let mut ranges = SmallVec::new(); + memtable.build_ranges(index.row_group_index, &mut ranges); + ranges } /// Prunes a file to scan and returns the builder to build readers. diff --git a/src/mito2/src/read/scan_util.rs b/src/mito2/src/read/scan_util.rs index 0bdf62e77e03..77a9bb161254 100644 --- a/src/mito2/src/read/scan_util.rs +++ b/src/mito2/src/read/scan_util.rs @@ -137,10 +137,9 @@ pub(crate) fn scan_mem_ranges( part_metrics: PartitionMetrics, index: RowGroupIndex, time_range: FileTimeRange, - range_builder_list: Arc, ) -> impl Stream> { try_stream! { - let ranges = range_builder_list.build_mem_ranges(&stream_ctx.input, index); + let ranges = stream_ctx.input.build_mem_ranges(index); part_metrics.inc_num_mem_ranges(ranges.len()); for range in ranges { let build_reader_start = Instant::now(); diff --git a/src/mito2/src/read/seq_scan.rs b/src/mito2/src/read/seq_scan.rs index bdf3a7d6b8bb..ca9291c0f6ed 100644 --- a/src/mito2/src/read/seq_scan.rs +++ b/src/mito2/src/read/seq_scan.rs @@ -403,7 +403,6 @@ fn build_sources( part_metrics.clone(), *index, range_meta.time_range, - range_builder_list.clone(), ); Box::pin(stream) as _ } else { diff --git a/src/mito2/src/read/unordered_scan.rs b/src/mito2/src/read/unordered_scan.rs index 60e5ca5c7cdb..28e7d64addd8 100644 --- a/src/mito2/src/read/unordered_scan.rs +++ b/src/mito2/src/read/unordered_scan.rs @@ -97,7 +97,6 @@ impl UnorderedScan { part_metrics.clone(), *index, range_meta.time_range, - range_builder_list.clone(), ); for await batch in stream { yield batch; diff --git a/src/mito2/src/test_util/memtable_util.rs b/src/mito2/src/test_util/memtable_util.rs index f1cc57aa3b51..1a0eacecf823 100644 --- a/src/mito2/src/test_util/memtable_util.rs +++ b/src/mito2/src/test_util/memtable_util.rs @@ -35,7 +35,7 @@ use crate::memtable::key_values::KeyValue; use crate::memtable::partition_tree::data::{timestamp_array_to_i64_slice, DataBatch, DataBuffer}; use crate::memtable::{ BoxedBatchIterator, BulkPart, KeyValues, Memtable, MemtableBuilder, MemtableId, MemtableRange, - MemtableRef, MemtableStats, + MemtableRanges, MemtableRef, MemtableStats, }; use crate::row_converter::{McmpRowCodec, RowCodec, SortField}; @@ -93,8 +93,8 @@ impl Memtable for EmptyMemtable { &self, _projection: Option<&[ColumnId]>, _predicate: Option, - ) -> BTreeMap { - BTreeMap::new() + ) -> MemtableRanges { + MemtableRanges::default() } fn is_empty(&self) -> bool { From c6b7caa2ec246a59260e900710fde289e86f1021 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Wed, 18 Dec 2024 14:39:49 +0800 Subject: [PATCH 43/59] feat: do not remove time filters in ScanRegion (#5180) * feat: do not remove time filters * chore: remove `time_range` from parquet reader * chore: print more message in the check script * chore: fix unused error --- scripts/check-snafu.py | 6 +- src/mito2/src/error.rs | 8 -- src/mito2/src/read/scan_region.rs | 11 +-- src/mito2/src/sst/parquet/reader.rs | 90 +------------------ src/query/src/tests/time_range_filter_test.rs | 4 +- src/table/src/predicate.rs | 16 ++-- 6 files changed, 17 insertions(+), 118 deletions(-) diff --git a/scripts/check-snafu.py b/scripts/check-snafu.py index d44edfeb8c45..b91950692bd8 100644 --- a/scripts/check-snafu.py +++ b/scripts/check-snafu.py @@ -58,8 +58,10 @@ def main(): if not check_snafu_in_files(branch_name, other_rust_files) ] - for name in unused_snafu: - print(name) + if unused_snafu: + print("Unused error variants:") + for name in unused_snafu: + print(name) if unused_snafu: raise SystemExit(1) diff --git a/src/mito2/src/error.rs b/src/mito2/src/error.rs index f6d1dbafeec9..82b86a21554c 100644 --- a/src/mito2/src/error.rs +++ b/src/mito2/src/error.rs @@ -756,13 +756,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to build time range filters for value: {:?}", timestamp))] - BuildTimeRangeFilter { - timestamp: Timestamp, - #[snafu(implicit)] - location: Location, - }, - #[snafu(display("Failed to open region"))] OpenRegion { #[snafu(implicit)] @@ -1023,7 +1016,6 @@ impl ErrorExt for Error { ChecksumMismatch { .. } => StatusCode::Unexpected, RegionStopped { .. } => StatusCode::RegionNotReady, TimeRangePredicateOverflow { .. } => StatusCode::InvalidArguments, - BuildTimeRangeFilter { .. } => StatusCode::Unexpected, UnsupportedOperation { .. } => StatusCode::Unsupported, RemoteCompaction { .. } => StatusCode::Unexpected, diff --git a/src/mito2/src/read/scan_region.rs b/src/mito2/src/read/scan_region.rs index 946ef2884132..091b9bc48c14 100644 --- a/src/mito2/src/read/scan_region.rs +++ b/src/mito2/src/read/scan_region.rs @@ -355,8 +355,8 @@ impl ScanRegion { Ok(input) } - /// Build time range predicate from filters, also remove time filters from request. - fn build_time_range_predicate(&mut self) -> TimestampRange { + /// Build time range predicate from filters. + fn build_time_range_predicate(&self) -> TimestampRange { let time_index = self.version.metadata.time_index_column(); let unit = time_index .column_schema @@ -364,11 +364,7 @@ impl ScanRegion { .as_timestamp() .expect("Time index must have timestamp-compatible type") .unit(); - build_time_range_predicate( - &time_index.column_schema.name, - unit, - &mut self.request.filters, - ) + build_time_range_predicate(&time_index.column_schema.name, unit, &self.request.filters) } /// Remove field filters if the merge mode is [MergeMode::LastNonNull]. @@ -695,7 +691,6 @@ impl ScanInput { .access_layer .read_sst(file.clone()) .predicate(self.predicate.clone()) - .time_range(self.time_range) .projection(Some(self.mapper.column_ids().to_vec())) .cache(self.cache_manager.clone()) .inverted_index_applier(self.inverted_index_applier.clone()) diff --git a/src/mito2/src/sst/parquet/reader.rs b/src/mito2/src/sst/parquet/reader.rs index 335b09426eca..39153fce8d96 100644 --- a/src/mito2/src/sst/parquet/reader.rs +++ b/src/mito2/src/sst/parquet/reader.rs @@ -23,11 +23,7 @@ use api::v1::SemanticType; use async_trait::async_trait; use common_recordbatch::filter::SimpleFilterEvaluator; use common_telemetry::{debug, warn}; -use common_time::range::TimestampRange; -use common_time::timestamp::TimeUnit; -use common_time::Timestamp; -use datafusion_common::ScalarValue; -use datafusion_expr::{Expr, Operator}; +use datafusion_expr::Expr; use datatypes::arrow::record_batch::RecordBatch; use datatypes::data_type::ConcreteDataType; use itertools::Itertools; @@ -42,7 +38,6 @@ use store_api::storage::ColumnId; use table::predicate::Predicate; use crate::cache::CacheManagerRef; -use crate::error; use crate::error::{ ArrowReaderSnafu, InvalidMetadataSnafu, InvalidParquetSnafu, ReadParquetSnafu, Result, }; @@ -74,8 +69,6 @@ pub struct ParquetReaderBuilder { object_store: ObjectStore, /// Predicate to push down. predicate: Option, - /// Time range to filter. - time_range: Option, /// Metadata of columns to read. /// /// `None` reads all columns. Due to schema change, the projection @@ -104,7 +97,6 @@ impl ParquetReaderBuilder { file_handle, object_store, predicate: None, - time_range: None, projection: None, cache_manager: None, inverted_index_applier: None, @@ -120,13 +112,6 @@ impl ParquetReaderBuilder { self } - /// Attaches the time range to the builder. - #[must_use] - pub fn time_range(mut self, time_range: Option) -> ParquetReaderBuilder { - self.time_range = time_range; - self - } - /// Attaches the projection to the builder. /// /// The reader only applies the projection to fields. @@ -238,7 +223,7 @@ impl ParquetReaderBuilder { cache_manager: self.cache_manager.clone(), }; - let mut filters = if let Some(predicate) = &self.predicate { + let filters = if let Some(predicate) = &self.predicate { predicate .exprs() .iter() @@ -254,10 +239,6 @@ impl ParquetReaderBuilder { vec![] }; - if let Some(time_range) = &self.time_range { - filters.extend(time_range_to_predicate(*time_range, ®ion_meta)?); - } - let codec = McmpRowCodec::new( read_format .metadata() @@ -678,59 +659,6 @@ impl ParquetReaderBuilder { } } -/// Transforms time range into [SimpleFilterEvaluator]. -fn time_range_to_predicate( - time_range: TimestampRange, - metadata: &RegionMetadataRef, -) -> Result> { - let ts_col = metadata.time_index_column(); - let ts_col_id = ts_col.column_id; - - let ts_to_filter = |op: Operator, timestamp: &Timestamp| { - let value = match timestamp.unit() { - TimeUnit::Second => ScalarValue::TimestampSecond(Some(timestamp.value()), None), - TimeUnit::Millisecond => { - ScalarValue::TimestampMillisecond(Some(timestamp.value()), None) - } - TimeUnit::Microsecond => { - ScalarValue::TimestampMicrosecond(Some(timestamp.value()), None) - } - TimeUnit::Nanosecond => ScalarValue::TimestampNanosecond(Some(timestamp.value()), None), - }; - let evaluator = SimpleFilterEvaluator::new(ts_col.column_schema.name.clone(), value, op) - .context(error::BuildTimeRangeFilterSnafu { - timestamp: *timestamp, - })?; - Ok(SimpleFilterContext::new( - evaluator, - ts_col_id, - SemanticType::Timestamp, - ts_col.column_schema.data_type.clone(), - )) - }; - - let predicates = match (time_range.start(), time_range.end()) { - (Some(start), Some(end)) => { - vec![ - ts_to_filter(Operator::GtEq, start)?, - ts_to_filter(Operator::Lt, end)?, - ] - } - - (Some(start), None) => { - vec![ts_to_filter(Operator::GtEq, start)?] - } - - (None, Some(end)) => { - vec![ts_to_filter(Operator::Lt, end)?] - } - (None, None) => { - vec![] - } - }; - Ok(predicates) -} - /// Metrics of filtering rows groups and rows. #[derive(Debug, Default, Clone, Copy)] pub(crate) struct ReaderFilterMetrics { @@ -939,20 +867,6 @@ pub(crate) struct SimpleFilterContext { } impl SimpleFilterContext { - fn new( - filter: SimpleFilterEvaluator, - column_id: ColumnId, - semantic_type: SemanticType, - data_type: ConcreteDataType, - ) -> Self { - Self { - filter, - column_id, - semantic_type, - data_type, - } - } - /// Creates a context for the `expr`. /// /// Returns None if the column to filter doesn't exist in the SST metadata or the diff --git a/src/query/src/tests/time_range_filter_test.rs b/src/query/src/tests/time_range_filter_test.rs index edb404220943..e141c99fa562 100644 --- a/src/query/src/tests/time_range_filter_test.rs +++ b/src/query/src/tests/time_range_filter_test.rs @@ -115,9 +115,9 @@ struct TimeRangeTester { impl TimeRangeTester { async fn check(&self, sql: &str, expect: TimestampRange) { let _ = exec_selection(self.engine.clone(), sql).await; - let mut filters = self.take_filters(); + let filters = self.take_filters(); - let range = build_time_range_predicate("ts", TimeUnit::Millisecond, &mut filters); + let range = build_time_range_predicate("ts", TimeUnit::Millisecond, &filters); assert_eq!(expect, range); } diff --git a/src/table/src/predicate.rs b/src/table/src/predicate.rs index 267f60b10834..1fd5cdcbd362 100644 --- a/src/table/src/predicate.rs +++ b/src/table/src/predicate.rs @@ -135,21 +135,17 @@ impl Predicate { // since it requires query engine to convert sql to filters. /// `build_time_range_predicate` extracts time range from logical exprs to facilitate fast /// time range pruning. -pub fn build_time_range_predicate<'a>( - ts_col_name: &'a str, +pub fn build_time_range_predicate( + ts_col_name: &str, ts_col_unit: TimeUnit, - filters: &'a mut Vec, + filters: &[Expr], ) -> TimestampRange { let mut res = TimestampRange::min_to_max(); - let mut filters_remain = vec![]; - for expr in std::mem::take(filters) { - if let Some(range) = extract_time_range_from_expr(ts_col_name, ts_col_unit, &expr) { + for expr in filters { + if let Some(range) = extract_time_range_from_expr(ts_col_name, ts_col_unit, expr) { res = res.and(&range); - } else { - filters_remain.push(expr); } } - *filters = filters_remain; res } @@ -392,7 +388,7 @@ mod tests { fn check_build_predicate(expr: Expr, expect: TimestampRange) { assert_eq!( expect, - build_time_range_predicate("ts", TimeUnit::Millisecond, &mut vec![expr]) + build_time_range_predicate("ts", TimeUnit::Millisecond, &[expr]) ); } From 9b4e8555e256eedf1bf7f2b6c56b077a7ac4e405 Mon Sep 17 00:00:00 2001 From: jeremyhi Date: Wed, 18 Dec 2024 16:17:34 +0800 Subject: [PATCH 44/59] feat: extract hints from http header (#5128) * feat: extract hints from http header * Update src/servers/src/http/hints.rs Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> * chore: by comment * refactor: get instead of loop --------- Co-authored-by: shuiyisong <113876041+shuiyisong@users.noreply.github.com> --- src/servers/src/grpc/database.rs | 57 +---------- src/servers/src/hint_headers.rs | 170 +++++++++++++++++++++++++++++++ src/servers/src/http.rs | 4 +- src/servers/src/http/hints.rs | 30 ++++++ src/servers/src/lib.rs | 1 + 5 files changed, 207 insertions(+), 55 deletions(-) create mode 100644 src/servers/src/hint_headers.rs create mode 100644 src/servers/src/http/hints.rs diff --git a/src/servers/src/grpc/database.rs b/src/servers/src/grpc/database.rs index 572f3c66f4d2..121d8c6c8594 100644 --- a/src/servers/src/grpc/database.rs +++ b/src/servers/src/grpc/database.rs @@ -20,13 +20,11 @@ use common_error::status_code::StatusCode; use common_query::OutputData; use common_telemetry::{debug, warn}; use futures::StreamExt; -use tonic::metadata::{KeyAndValueRef, MetadataMap}; use tonic::{Request, Response, Status, Streaming}; use crate::grpc::greptime_handler::GreptimeRequestHandler; use crate::grpc::{cancellation, TonicResult}; - -pub const GREPTIME_DB_HEADER_HINT_PREFIX: &str = "x-greptime-hint-"; +use crate::hint_headers; pub(crate) struct DatabaseService { handler: GreptimeRequestHandler, @@ -45,7 +43,7 @@ impl GreptimeDatabase for DatabaseService { request: Request, ) -> TonicResult> { let remote_addr = request.remote_addr(); - let hints = extract_hints(request.metadata()); + let hints = hint_headers::extract_hints(request.metadata()); debug!( "GreptimeDatabase::Handle: request from {:?} with hints: {:?}", remote_addr, hints @@ -91,7 +89,7 @@ impl GreptimeDatabase for DatabaseService { request: Request>, ) -> Result, Status> { let remote_addr = request.remote_addr(); - let hints = extract_hints(request.metadata()); + let hints = hint_headers::extract_hints(request.metadata()); debug!( "GreptimeDatabase::HandleRequests: request from {:?} with hints: {:?}", remote_addr, hints @@ -142,52 +140,3 @@ impl GreptimeDatabase for DatabaseService { cancellation::with_cancellation_handler(request_future, cancellation_future).await } } - -fn extract_hints(metadata: &MetadataMap) -> Vec<(String, String)> { - metadata - .iter() - .filter_map(|kv| { - let KeyAndValueRef::Ascii(key, value) = kv else { - return None; - }; - let key = key.as_str(); - let new_key = key.strip_prefix(GREPTIME_DB_HEADER_HINT_PREFIX)?; - let Ok(value) = value.to_str() else { - // Simply return None for non-string values. - return None; - }; - Some((new_key.to_string(), value.trim().to_string())) - }) - .collect() -} - -#[cfg(test)] -mod tests { - use tonic::metadata::MetadataValue; - - use super::*; - - #[test] - fn test_extract_hints() { - let mut metadata = MetadataMap::new(); - let prev = metadata.insert( - "x-greptime-hint-append_mode", - MetadataValue::from_static("true"), - ); - metadata.insert("test-key", MetadataValue::from_static("test-value")); - assert!(prev.is_none()); - let hints = extract_hints(&metadata); - assert_eq!(hints, vec![("append_mode".to_string(), "true".to_string())]); - } - - #[test] - fn extract_hints_ignores_non_ascii_metadata() { - let mut metadata = MetadataMap::new(); - metadata.insert_bin( - "x-greptime-hint-merge_mode-bin", - MetadataValue::from_bytes(b"last_non_null"), - ); - let hints = extract_hints(&metadata); - assert!(hints.is_empty()); - } -} diff --git a/src/servers/src/hint_headers.rs b/src/servers/src/hint_headers.rs new file mode 100644 index 000000000000..6dafd45196b3 --- /dev/null +++ b/src/servers/src/hint_headers.rs @@ -0,0 +1,170 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use http::HeaderMap; +use tonic::metadata::MetadataMap; + +pub const HINT_KEYS: [&str; 5] = [ + "x-greptime-hint-auto_create_table", + "x-greptime-hint-ttl", + "x-greptime-hint-append_mode", + "x-greptime-hint-merge_mode", + "x-greptime-hint-physical_table", +]; + +pub(crate) fn extract_hints(headers: &T) -> Vec<(String, String)> { + let mut hints = Vec::new(); + for key in HINT_KEYS.iter() { + if let Some(value) = headers.get(key) { + let new_key = key.replace("x-greptime-hint-", ""); + hints.push((new_key, value.trim().to_string())); + } + } + hints +} + +pub(crate) trait ToHeaderMap { + fn get(&self, key: &str) -> Option<&str>; +} + +impl ToHeaderMap for MetadataMap { + fn get(&self, key: &str) -> Option<&str> { + self.get(key).and_then(|v| v.to_str().ok()) + } +} + +impl ToHeaderMap for HeaderMap { + fn get(&self, key: &str) -> Option<&str> { + self.get(key).and_then(|v| v.to_str().ok()) + } +} +#[cfg(test)] +mod tests { + use http::header::{HeaderMap, HeaderValue}; + use tonic::metadata::{MetadataMap, MetadataValue}; + + use super::*; + + #[test] + fn test_extract_hints_with_full_header_map() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-greptime-hint-auto_create_table", + HeaderValue::from_static("true"), + ); + headers.insert("x-greptime-hint-ttl", HeaderValue::from_static("3600d")); + headers.insert( + "x-greptime-hint-append_mode", + HeaderValue::from_static("true"), + ); + headers.insert( + "x-greptime-hint-merge_mode", + HeaderValue::from_static("false"), + ); + headers.insert( + "x-greptime-hint-physical_table", + HeaderValue::from_static("table1"), + ); + + let hints = extract_hints(&headers); + + assert_eq!(hints.len(), 5); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + assert_eq!(hints[2], ("append_mode".to_string(), "true".to_string())); + assert_eq!(hints[3], ("merge_mode".to_string(), "false".to_string())); + assert_eq!( + hints[4], + ("physical_table".to_string(), "table1".to_string()) + ); + } + + #[test] + fn test_extract_hints_with_missing_keys() { + let mut headers = HeaderMap::new(); + headers.insert( + "x-greptime-hint-auto_create_table", + HeaderValue::from_static("true"), + ); + headers.insert("x-greptime-hint-ttl", HeaderValue::from_static("3600d")); + + let hints = extract_hints(&headers); + + assert_eq!(hints.len(), 2); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + } + + #[test] + fn test_extract_hints_with_metadata_map() { + let mut metadata = MetadataMap::new(); + metadata.insert( + "x-greptime-hint-auto_create_table", + MetadataValue::from_static("true"), + ); + metadata.insert("x-greptime-hint-ttl", MetadataValue::from_static("3600d")); + metadata.insert( + "x-greptime-hint-append_mode", + MetadataValue::from_static("true"), + ); + metadata.insert( + "x-greptime-hint-merge_mode", + MetadataValue::from_static("false"), + ); + metadata.insert( + "x-greptime-hint-physical_table", + MetadataValue::from_static("table1"), + ); + + let hints = extract_hints(&metadata); + + assert_eq!(hints.len(), 5); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + assert_eq!(hints[2], ("append_mode".to_string(), "true".to_string())); + assert_eq!(hints[3], ("merge_mode".to_string(), "false".to_string())); + assert_eq!( + hints[4], + ("physical_table".to_string(), "table1".to_string()) + ); + } + + #[test] + fn test_extract_hints_with_partial_metadata_map() { + let mut metadata = MetadataMap::new(); + metadata.insert( + "x-greptime-hint-auto_create_table", + MetadataValue::from_static("true"), + ); + metadata.insert("x-greptime-hint-ttl", MetadataValue::from_static("3600d")); + + let hints = extract_hints(&metadata); + + assert_eq!(hints.len(), 2); + assert_eq!( + hints[0], + ("auto_create_table".to_string(), "true".to_string()) + ); + assert_eq!(hints[1], ("ttl".to_string(), "3600d".to_string())); + } +} diff --git a/src/servers/src/http.rs b/src/servers/src/http.rs index 1107870c9a25..9841f02d6ead 100644 --- a/src/servers/src/http.rs +++ b/src/servers/src/http.rs @@ -92,6 +92,7 @@ mod timeout; pub(crate) use timeout::DynamicTimeoutLayer; +mod hints; #[cfg(any(test, feature = "testing"))] pub mod test_helpers; @@ -703,7 +704,8 @@ impl HttpServer { .layer(middleware::from_fn_with_state( AuthState::new(self.user_provider.clone()), authorize::check_http_auth, - )), + )) + .layer(middleware::from_fn(hints::extract_hints)), ) // Handlers for debug, we don't expect a timeout. .nest( diff --git a/src/servers/src/http/hints.rs b/src/servers/src/http/hints.rs new file mode 100644 index 000000000000..4612201880eb --- /dev/null +++ b/src/servers/src/http/hints.rs @@ -0,0 +1,30 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use axum::http::Request; +use axum::middleware::Next; +use axum::response::Response; +use session::context::QueryContext; + +use crate::hint_headers; + +pub async fn extract_hints(mut request: Request, next: Next) -> Response { + let hints = hint_headers::extract_hints(request.headers()); + if let Some(query_ctx) = request.extensions_mut().get_mut::() { + for (key, value) in hints { + query_ctx.set_extension(key, value); + } + } + next.run(request).await +} diff --git a/src/servers/src/lib.rs b/src/servers/src/lib.rs index ce6857c6d23f..92f2b8b9d0ba 100644 --- a/src/servers/src/lib.rs +++ b/src/servers/src/lib.rs @@ -27,6 +27,7 @@ pub mod error; pub mod export_metrics; pub mod grpc; pub mod heartbeat_options; +mod hint_headers; pub mod http; pub mod influxdb; pub mod interceptor; From fa773cf48031e4da6df6997cb8aa572dad3f896b Mon Sep 17 00:00:00 2001 From: "Lei, HUANG" <6406592+v0y4g3r@users.noreply.github.com> Date: Wed, 18 Dec 2024 16:24:15 +0800 Subject: [PATCH 45/59] fix(sqlness): enforce order in union tests (#5190) Add ORDER BY clause to subquery union tests Updated the SQL and result files for subquery union tests to include an ORDER BY clause, ensuring consistent result ordering. This change aligns with the test case from the DuckDB repository. --- tests/cases/standalone/common/subquery/table.result | 2 +- tests/cases/standalone/common/subquery/table.sql | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/cases/standalone/common/subquery/table.result b/tests/cases/standalone/common/subquery/table.result index 8cea3aed13e2..549a38754916 100644 --- a/tests/cases/standalone/common/subquery/table.result +++ b/tests/cases/standalone/common/subquery/table.result @@ -63,7 +63,7 @@ Affected Rows: 0 -- subquery union, from: -- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/subquery/table/test_subquery_union.test -SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43); +SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43) ORDER BY 1; +-----------+ | Int64(42) | diff --git a/tests/cases/standalone/common/subquery/table.sql b/tests/cases/standalone/common/subquery/table.sql index 9f53aef301f4..d49f3af739a6 100644 --- a/tests/cases/standalone/common/subquery/table.sql +++ b/tests/cases/standalone/common/subquery/table.sql @@ -24,7 +24,7 @@ DROP TABLE test; -- subquery union, from: -- https://github.com/duckdb/duckdb/blob/9196dd9b0a163e6c8aada26218803d04be30c562/test/sql/subquery/table/test_subquery_union.test -SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43); +SELECT * FROM (SELECT 42) UNION ALL SELECT * FROM (SELECT 43) ORDER BY 1; -- table subquery, from: -- https://github.com/duckdb/duckdb/blob/8704c7d0807d6ce1e2ebcdf6398e1b6cc050e507/test/sql/subquery/table/test_table_subquery.test From f04d3802598aa049fdd93900e180df2dececf7e4 Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 18 Dec 2024 16:51:46 +0800 Subject: [PATCH 46/59] fix: validate matcher op for __name__ in promql (#5191) Signed-off-by: Ruihang Xia --- src/query/src/promql/planner.rs | 7 +++++++ tests/cases/standalone/common/tql/basic.result | 4 ++++ tests/cases/standalone/common/tql/basic.sql | 2 ++ 3 files changed, 13 insertions(+) diff --git a/src/query/src/promql/planner.rs b/src/query/src/promql/planner.rs index 1e7bc27dab6a..bfdfb5981ae1 100644 --- a/src/query/src/promql/planner.rs +++ b/src/query/src/promql/planner.rs @@ -689,6 +689,13 @@ impl PromPlanner { let mut matches = label_matchers.find_matchers(METRIC_NAME); ensure!(!matches.is_empty(), NoMetricMatcherSnafu); ensure!(matches.len() == 1, MultipleMetricMatchersSnafu); + ensure!( + matches[0].op == MatchOp::Equal, + UnsupportedMatcherOpSnafu { + matcher_op: matches[0].op.to_string(), + matcher: METRIC_NAME + } + ); metric_name = matches.pop().map(|m| m.value); } diff --git a/tests/cases/standalone/common/tql/basic.result b/tests/cases/standalone/common/tql/basic.result index 5c6725dbcd07..3015101a5554 100644 --- a/tests/cases/standalone/common/tql/basic.result +++ b/tests/cases/standalone/common/tql/basic.result @@ -66,6 +66,10 @@ TQL EVAL (0, 10, '5s') {__name__!="test"}; Error: 2000(InvalidSyntax), vector selector must contain at least one non-empty matcher +TQL EVAL (0, 10, '5s') {__name__=~"test"}; + +Error: 1004(InvalidArguments), Matcher operator =~ is not supported for __name__ + -- the point at 1ms will be shadowed by the point at 2ms TQL EVAL (0, 10, '5s') test{k="a"}; diff --git a/tests/cases/standalone/common/tql/basic.sql b/tests/cases/standalone/common/tql/basic.sql index 85f29481486c..afca586ed8a1 100644 --- a/tests/cases/standalone/common/tql/basic.sql +++ b/tests/cases/standalone/common/tql/basic.sql @@ -22,6 +22,8 @@ TQL EVAL (0, 10, '5s') {__name__="test", __field__="i"}; -- NOT SUPPORTED: `__name__` matcher without equal condition TQL EVAL (0, 10, '5s') {__name__!="test"}; +TQL EVAL (0, 10, '5s') {__name__=~"test"}; + -- the point at 1ms will be shadowed by the point at 2ms TQL EVAL (0, 10, '5s') test{k="a"}; From 218236cc5b2b444346431263d35629715df4b155 Mon Sep 17 00:00:00 2001 From: Yingwen Date: Wed, 18 Dec 2024 17:10:56 +0800 Subject: [PATCH 47/59] docs: fix grafana dashboard row (#5192) --- grafana/greptimedb.json | 2596 ++++++++++++++++++++------------------- 1 file changed, 1299 insertions(+), 1297 deletions(-) diff --git a/grafana/greptimedb.json b/grafana/greptimedb.json index c526373874fa..9657565c27fe 100644 --- a/grafana/greptimedb.json +++ b/grafana/greptimedb.json @@ -2707,754 +2707,755 @@ "y": 48 }, "id": 21, - "panels": [], - "title": "Storage Components", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 49 - }, - "id": 18, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ + "panels": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{scheme}}-{{operation}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "OpenDAL traffic", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 14 + }, + "id": 18, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "s" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 49 - }, - "id": 2, - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(opendal_bytes_total_sum[$__rate_interval])", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{scheme}}-{{operation}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "OpenDAL traffic", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "__auto", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "OpenDAL operation duration", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 56 - }, - "id": 43, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_object_store_lru_cache_bytes", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "percentunit" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 56 - }, - "id": 44, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Object store read cache hit", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "s" }, - "thresholdsStyle": { - "mode": "off" - } + "overrides": [] }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null - }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 63 - }, - "id": 10, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 14 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p95", - "range": true, - "refId": "Log Store P95" + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "builder", + "expr": "histogram_quantile(0.95, sum by(le, operation, schema) (rate(opendal_requests_duration_seconds_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "__auto", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "OpenDAL operation duration", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{logstore}}-{{optype}}-p99", - "range": true, - "refId": "Log Store P99" - } - ], - "title": "Log Store op duration seconds", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 21 + }, + "id": 43, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 63 - }, - "id": 12, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_object_store_lru_cache_bytes", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache size", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p95", - "range": true, - "refId": "A", - "useBackend": false + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "percentunit" + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 21 + }, + "id": 44, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance) / (sum(increase(greptime_object_store_lru_cache_miss[$__rate_interval])) by (instance) + sum(increase(greptime_object_store_lru_cache_hit[$__rate_interval])) by (instance))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Object store read cache hit", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "req-size-p99", - "range": true, - "refId": "C", - "useBackend": false + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 28 + }, + "id": 10, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p95", + "range": true, + "refId": "Log Store P95" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le,logstore,optype) (rate(greptime_logstore_op_elapsed_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{logstore}}-{{optype}}-p99", + "range": true, + "refId": "Log Store P99" + } + ], + "title": "Log Store op duration seconds", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", - "hide": false, - "instant": false, - "legendFormat": "throughput", - "range": true, - "refId": "B" - } - ], - "title": "WAL write size", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" + }, + "overrides": [] }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 28 + }, + "id": 12, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p95", + "range": true, + "refId": "A", + "useBackend": false }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le) (rate(raft_engine_write_size_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "req-size-p99", + "range": true, + "refId": "C", + "useBackend": false }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 70 - }, - "id": 37, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "editorMode": "code", + "expr": "rate(raft_engine_write_size_sum[$__rate_interval])", + "hide": false, + "instant": false, + "legendFormat": "throughput", + "range": true, + "refId": "B" + } + ], + "title": "WAL write size", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", - "hide": false, - "instant": false, - "legendFormat": "{{node}}-{{type}}-p99", - "range": true, - "refId": "Log Store P95" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 35 + }, + "id": 37, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type, node) (rate(raft_engine_sync_log_duration_seconds_bucket[$__rate_interval])))", + "hide": false, + "instant": false, + "legendFormat": "{{node}}-{{type}}-p99", + "range": true, + "refId": "Log Store P95" + } + ], + "title": "WAL sync duration seconds", + "type": "timeseries" } ], - "title": "WAL sync duration seconds", - "type": "timeseries" + "title": "Storage Components", + "type": "row" }, { - "collapsed": false, + "collapsed": true, "gridPos": { "h": 1, "w": 24, @@ -3462,681 +3463,682 @@ "y": 49 }, "id": 46, - "panels": [], - "title": "Index", - "type": "row" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 15 + }, + "id": 45, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 50 - }, - "id": 45, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "code", + "expr": "greptime_index_create_memory_usage", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "A", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "editorMode": "code", + "expr": "greptime_index_apply_memory_usage", + "hide": false, + "instant": false, + "legendFormat": "{{instance}}", + "range": true, + "refId": "B" + } + ], + "title": "Index memory usage", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "greptime_index_create_memory_usage", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "A", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" + }, + "overrides": [] }, - "editorMode": "code", - "expr": "greptime_index_apply_memory_usage", - "hide": false, - "instant": false, - "legendFormat": "{{instance}}", - "range": true, - "refId": "B" - } - ], - "title": "Index memory usage", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 15 }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "id": 19, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 50 - }, - "id": 19, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "apply-{{type}}-p95", + "range": true, + "refId": "Apply P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "apply-{{type}}-p95", + "range": true, + "refId": "Apply P99", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P95", + "useBackend": false + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "create-{{type}}-p95", + "range": true, + "refId": "Create P99", + "useBackend": false + } + ], + "title": "Index elapsed", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P95", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_apply_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "apply-{{type}}-p95", - "range": true, - "refId": "Apply P99", - "useBackend": false - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 22 }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.95, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "create-{{type}}-p95", - "range": true, - "refId": "Create P95", - "useBackend": false + "id": 47, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" + }, + "disableTextWrap": false, + "editorMode": "code", + "expr": "rate(greptime_index_create_rows_total[$__rate_interval])", + "fullMetaSearch": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{type}}", + "range": true, + "refId": "A", + "useBackend": false + } + ], + "title": "Index create rows total", + "type": "timeseries" }, { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "histogram_quantile(0.99, sum by(le, type) (rate(greptime_index_create_elapsed_bucket[$__rate_interval])))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "create-{{type}}-p95", - "range": true, - "refId": "Create P99", - "useBackend": false - } - ], - "title": "Index elapsed", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - }, - "thresholdsStyle": { - "mode": "off" - } - }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] }, - { - "color": "red", - "value": 80 - } - ] - } - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 57 - }, - "id": 47, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" + "unit": "bytes" + }, + "overrides": [] }, - "disableTextWrap": false, - "editorMode": "code", - "expr": "rate(greptime_index_create_rows_total[$__rate_interval])", - "fullMetaSearch": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{type}}", - "range": true, - "refId": "A", - "useBackend": false - } - ], - "title": "Index create rows total", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 22 }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "id": 48, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 57 - }, - "id": 48, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type) (rate(greptime_index_create_bytes_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index create bytes", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum by(instance, type) (rate(greptime_index_create_bytes_total[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Index create bytes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" - }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "bytes" }, - "thresholdsStyle": { - "mode": "off" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 49, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "bytes" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 0, - "y": 64 - }, - "id": 49, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_bytes_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index IO bytes", + "type": "timeseries" }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS-1}" }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_bytes_total[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}-{{file_type}}", - "range": true, - "refId": "B", - "useBackend": false - } - ], - "title": "Index IO bytes", - "type": "timeseries" - }, - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "fieldConfig": { - "defaults": { - "color": { - "mode": "palette-classic" - }, - "custom": { - "axisBorderShow": false, - "axisCenteredZero": false, - "axisColorMode": "text", - "axisLabel": "", - "axisPlacement": "auto", - "barAlignment": 0, - "drawStyle": "line", - "fillOpacity": 0, - "gradientMode": "none", - "hideFrom": { - "legend": false, - "tooltip": false, - "viz": false - }, - "insertNulls": false, - "lineInterpolation": "linear", - "lineWidth": 1, - "pointSize": 5, - "scaleDistribution": { - "type": "linear" + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + }, + "unit": "none" }, - "showPoints": "auto", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" + "overrides": [] + }, + "gridPos": { + "h": 7, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 50, + "interval": "1s", + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true }, - "thresholdsStyle": { - "mode": "off" + "tooltip": { + "mode": "single", + "sort": "none" } }, - "mappings": [], - "thresholds": { - "mode": "absolute", - "steps": [ - { - "color": "green", - "value": null + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS-1}" }, - { - "color": "red", - "value": 80 - } - ] - }, - "unit": "none" - }, - "overrides": [] - }, - "gridPos": { - "h": 7, - "w": 12, - "x": 12, - "y": 64 - }, - "id": 50, - "interval": "1s", - "options": { - "legend": { - "calcs": [], - "displayMode": "list", - "placement": "bottom", - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "datasource": { - "type": "prometheus", - "uid": "${DS_PROMETHEUS-1}" - }, - "disableTextWrap": false, - "editorMode": "builder", - "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_op_total[$__rate_interval]))", - "fullMetaSearch": false, - "hide": false, - "includeNullMetadata": false, - "instant": false, - "legendFormat": "{{instance}}-{{type}}-{{file_type}}", - "range": true, - "refId": "B", - "useBackend": false + "disableTextWrap": false, + "editorMode": "builder", + "expr": "sum by(instance, type, file_type) (rate(greptime_index_io_op_total[$__rate_interval]))", + "fullMetaSearch": false, + "hide": false, + "includeNullMetadata": false, + "instant": false, + "legendFormat": "{{instance}}-{{type}}-{{file_type}}", + "range": true, + "refId": "B", + "useBackend": false + } + ], + "title": "Index IO op", + "type": "timeseries" } ], - "title": "Index IO op", - "type": "timeseries" + "title": "Index", + "type": "row" } ], "refresh": "10s", @@ -4153,6 +4155,6 @@ "timezone": "", "title": "GreptimeDB", "uid": "e7097237-669b-4f8d-b751-13067afbfb68", - "version": 17, + "version": 18, "weekStart": "" } From 548e1988ab4256fb6443e9e800f36603ec29a4fb Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Wed, 18 Dec 2024 19:24:43 +0800 Subject: [PATCH 48/59] refactor: remove unused symbols (#5193) chore: remove unused symbols Signed-off-by: Ruihang Xia --- src/cmd/src/datanode.rs | 4 -- src/cmd/src/flownode.rs | 4 -- .../meta/src/cache/table/table_route.rs | 8 --- src/common/meta/src/key/table_route.rs | 53 ------------------- src/common/meta/src/rpc/router.rs | 46 ---------------- src/common/recordbatch/src/lib.rs | 14 ----- src/common/time/src/util.rs | 4 -- src/flow/src/adapter.rs | 22 -------- src/flow/src/compute/render/src_sink.rs | 44 +-------------- src/flow/src/compute/types.rs | 16 ------ src/flow/src/expr/error.rs | 5 -- src/flow/src/expr/linear.rs | 28 ---------- src/flow/src/plan.rs | 46 +--------------- src/flow/src/repr/relation.rs | 8 --- src/meta-srv/src/metasrv.rs | 4 -- src/meta-srv/src/mocks.rs | 5 -- .../src/procedure/region_migration.rs | 6 --- src/script/src/python/ffi_types/copr.rs | 20 ------- src/session/src/lib.rs | 4 -- src/sql/src/statements.rs | 20 +------ 20 files changed, 4 insertions(+), 357 deletions(-) diff --git a/src/cmd/src/datanode.rs b/src/cmd/src/datanode.rs index 811ed826ad49..be2aedf57ec8 100644 --- a/src/cmd/src/datanode.rs +++ b/src/cmd/src/datanode.rs @@ -59,10 +59,6 @@ impl Instance { } } - pub fn datanode_mut(&mut self) -> &mut Datanode { - &mut self.datanode - } - pub fn datanode(&self) -> &Datanode { &self.datanode } diff --git a/src/cmd/src/flownode.rs b/src/cmd/src/flownode.rs index a9ad12bfbc02..b399bf37f70d 100644 --- a/src/cmd/src/flownode.rs +++ b/src/cmd/src/flownode.rs @@ -63,10 +63,6 @@ impl Instance { } } - pub fn flownode_mut(&mut self) -> &mut FlownodeInstance { - &mut self.flownode - } - pub fn flownode(&self) -> &FlownodeInstance { &self.flownode } diff --git a/src/common/meta/src/cache/table/table_route.rs b/src/common/meta/src/cache/table/table_route.rs index 2383a1ea13d0..840e52f8ae1c 100644 --- a/src/common/meta/src/cache/table/table_route.rs +++ b/src/common/meta/src/cache/table/table_route.rs @@ -49,14 +49,6 @@ impl TableRoute { TableRoute::Logical(_) => None, } } - - /// Returns [LogicalTableRouteValue] reference if it's [TableRoute::Logical]; Otherwise it returns [None]. - pub fn as_logical_table_route_ref(&self) -> Option<&Arc> { - match self { - TableRoute::Physical(_) => None, - TableRoute::Logical(table_route) => Some(table_route), - } - } } /// [TableRouteCache] caches the [TableId] to [TableRoute] mapping. diff --git a/src/common/meta/src/key/table_route.rs b/src/common/meta/src/key/table_route.rs index 96949d2b9fda..b5ebf0b4b1ec 100644 --- a/src/common/meta/src/key/table_route.rs +++ b/src/common/meta/src/key/table_route.rs @@ -290,28 +290,6 @@ impl TableRouteManager { } } - /// Returns the [`PhysicalTableRouteValue`] in the first level, - /// It won't follow the [`LogicalTableRouteValue`] to find the next level [`PhysicalTableRouteValue`]. - /// - /// Returns an error if the first level value is not a [`PhysicalTableRouteValue`]. - pub async fn try_get_physical_table_route( - &self, - table_id: TableId, - ) -> Result> { - match self.storage.get(table_id).await? { - Some(route) => { - ensure!( - route.is_physical(), - UnexpectedLogicalRouteTableSnafu { - err_msg: format!("{route:?} is a non-physical TableRouteValue.") - } - ); - Ok(Some(route.into_physical_table_route())) - } - None => Ok(None), - } - } - /// Returns the [TableId] recursively. /// /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: @@ -569,37 +547,6 @@ impl TableRouteStorage { .transpose() } - /// Returns the physical `DeserializedValueWithBytes` recursively. - /// - /// Returns a [TableRouteNotFound](crate::error::Error::TableRouteNotFound) Error if: - /// - the physical table(`logical_or_physical_table_id`) does not exist - /// - the corresponding physical table of the logical table(`logical_or_physical_table_id`) does not exist. - pub async fn get_physical_table_route_with_raw_bytes( - &self, - logical_or_physical_table_id: TableId, - ) -> Result<(TableId, DeserializedValueWithBytes)> { - let table_route = self - .get_with_raw_bytes(logical_or_physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: logical_or_physical_table_id, - })?; - - match table_route.get_inner_ref() { - TableRouteValue::Physical(_) => Ok((logical_or_physical_table_id, table_route)), - TableRouteValue::Logical(x) => { - let physical_table_id = x.physical_table_id(); - let physical_table_route = self - .get_with_raw_bytes(physical_table_id) - .await? - .context(TableRouteNotFoundSnafu { - table_id: physical_table_id, - })?; - Ok((physical_table_id, physical_table_route)) - } - } - } - /// Returns batch of [`TableRouteValue`] that respects the order of `table_ids`. pub async fn batch_get(&self, table_ids: &[TableId]) -> Result>> { let mut table_routes = self.batch_get_inner(table_ids).await?; diff --git a/src/common/meta/src/rpc/router.rs b/src/common/meta/src/rpc/router.rs index dd7349ae8f79..0e700cc6daaf 100644 --- a/src/common/meta/src/rpc/router.rs +++ b/src/common/meta/src/rpc/router.rs @@ -89,39 +89,6 @@ pub fn convert_to_region_leader_map(region_routes: &[RegionRoute]) -> HashMap>() } -/// Returns the HashMap<[RegionNumber], HashSet> -pub fn convert_to_region_peer_map( - region_routes: &[RegionRoute], -) -> HashMap> { - region_routes - .iter() - .map(|x| { - let set = x - .follower_peers - .iter() - .map(|p| p.id) - .chain(x.leader_peer.as_ref().map(|p| p.id)) - .collect::>(); - - (x.region.id.region_number(), set) - }) - .collect::>() -} - -/// Returns the HashMap<[RegionNumber], [LeaderState]>; -pub fn convert_to_region_leader_state_map( - region_routes: &[RegionRoute], -) -> HashMap { - region_routes - .iter() - .filter_map(|x| { - x.leader_state - .as_ref() - .map(|state| (x.region.id.region_number(), *state)) - }) - .collect::>() -} - pub fn find_region_leader( region_routes: &[RegionRoute], region_number: RegionNumber, @@ -147,19 +114,6 @@ pub fn find_leader_regions(region_routes: &[RegionRoute], datanode: &Peer) -> Ve .collect() } -pub fn extract_all_peers(region_routes: &[RegionRoute]) -> Vec { - let mut peers = region_routes - .iter() - .flat_map(|x| x.leader_peer.iter().chain(x.follower_peers.iter())) - .collect::>() - .into_iter() - .cloned() - .collect::>(); - peers.sort_by_key(|x| x.id); - - peers -} - impl TableRoute { pub fn new(table: Table, region_routes: Vec) -> Self { let region_leaders = region_routes diff --git a/src/common/recordbatch/src/lib.rs b/src/common/recordbatch/src/lib.rs index 257b6f09732a..0281b457495e 100644 --- a/src/common/recordbatch/src/lib.rs +++ b/src/common/recordbatch/src/lib.rs @@ -26,7 +26,6 @@ use std::sync::Arc; use adapter::RecordBatchMetrics; use arc_swap::ArcSwapOption; -use datafusion::physical_plan::memory::MemoryStream; pub use datafusion::physical_plan::SendableRecordBatchStream as DfSendableRecordBatchStream; use datatypes::arrow::compute::SortOptions; pub use datatypes::arrow::record_batch::RecordBatch as DfRecordBatch; @@ -170,19 +169,6 @@ impl RecordBatches { index: 0, }) } - - pub fn into_df_stream(self) -> DfSendableRecordBatchStream { - let df_record_batches = self - .batches - .into_iter() - .map(|batch| batch.into_df_record_batch()) - .collect(); - // unwrap safety: `MemoryStream::try_new` won't fail - Box::pin( - MemoryStream::try_new(df_record_batches, self.schema.arrow_schema().clone(), None) - .unwrap(), - ) - } } impl IntoIterator for RecordBatches { diff --git a/src/common/time/src/util.rs b/src/common/time/src/util.rs index 19fe3bc9119e..ccb9e1bdd0a3 100644 --- a/src/common/time/src/util.rs +++ b/src/common/time/src/util.rs @@ -29,10 +29,6 @@ pub fn format_utc_datetime(utc: &NaiveDateTime, pattern: &str) -> String { } } -pub fn system_datetime_to_utc(local: &NaiveDateTime) -> LocalResult { - datetime_to_utc(local, get_timezone(None)) -} - /// Cast a [`NaiveDateTime`] with the given timezone. pub fn datetime_to_utc( datetime: &NaiveDateTime, diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index 80d03e27706b..7d9ae5e422d2 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -206,28 +206,6 @@ impl DiffRequest { } } -/// iterate through the diff row and form continuous diff row with same diff type -pub fn diff_row_to_request(rows: Vec) -> Vec { - let mut reqs = Vec::new(); - for (row, ts, diff) in rows { - let last = reqs.last_mut(); - match (last, diff) { - (Some(DiffRequest::Insert(rows)), 1) => { - rows.push((row, ts)); - } - (Some(DiffRequest::Insert(_)), -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])), - (Some(DiffRequest::Delete(rows)), -1) => { - rows.push((row, ts)); - } - (Some(DiffRequest::Delete(_)), 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])), - (None, 1) => reqs.push(DiffRequest::Insert(vec![(row, ts)])), - (None, -1) => reqs.push(DiffRequest::Delete(vec![(row, ts)])), - _ => {} - } - } - reqs -} - pub fn batches_to_rows_req(batches: Vec) -> Result, Error> { let mut reqs = Vec::new(); for batch in batches { diff --git a/src/flow/src/compute/render/src_sink.rs b/src/flow/src/compute/render/src_sink.rs index 62e733420b3c..cc8cf01ff7f3 100644 --- a/src/flow/src/compute/render/src_sink.rs +++ b/src/flow/src/compute/render/src_sink.rs @@ -14,7 +14,7 @@ //! Source and Sink for the dataflow -use std::collections::{BTreeMap, VecDeque}; +use std::collections::BTreeMap; use common_telemetry::{debug, trace}; use hydroflow::scheduled::graph_ext::GraphExt; @@ -28,7 +28,7 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, Toff}; use crate::error::{Error, PlanSnafu}; use crate::expr::error::InternalSnafu; use crate::expr::{Batch, EvalError}; -use crate::repr::{DiffRow, Row, BROADCAST_CAP}; +use crate::repr::{DiffRow, Row}; #[allow(clippy::mutable_key_type)] impl Context<'_, '_> { @@ -242,44 +242,4 @@ impl Context<'_, '_> { }, ); } - - /// Render a sink which send updates to broadcast channel, have internal buffer in case broadcast channel is full - pub fn render_sink(&mut self, bundle: CollectionBundle, sender: broadcast::Sender) { - let CollectionBundle { - collection, - arranged: _, - } = bundle; - let mut buf = VecDeque::with_capacity(1000); - - let schd = self.compute_state.get_scheduler(); - let inner_schd = schd.clone(); - let now = self.compute_state.current_time_ref(); - - let sink = self - .df - .add_subgraph_sink("Sink", collection.into_inner(), move |_ctx, recv| { - let data = recv.take_inner(); - buf.extend(data.into_iter().flat_map(|i| i.into_iter())); - if sender.len() >= BROADCAST_CAP { - return; - } else { - while let Some(row) = buf.pop_front() { - // if the sender is full, stop sending - if sender.len() >= BROADCAST_CAP { - break; - } - // TODO(discord9): handling tokio broadcast error - let _ = sender.send(row); - } - } - - // if buffer is not empty, schedule the next run at next tick - // so the buffer can be drained as soon as possible - if !buf.is_empty() { - inner_schd.schedule_at(*now.borrow() + 1); - } - }); - - schd.set_cur_subgraph(sink); - } } diff --git a/src/flow/src/compute/types.rs b/src/flow/src/compute/types.rs index 00ed660a6ef0..e125a2d27261 100644 --- a/src/flow/src/compute/types.rs +++ b/src/flow/src/compute/types.rs @@ -82,22 +82,6 @@ impl Arranged { writer: self.writer.clone(), }) } - - /// Copy the full arrangement, including the future and the current updates. - /// - /// Internally `Rc-ed` so it's cheap to copy - pub fn try_copy_full(&self) -> Option { - self.arrangement - .clone_full_arrange() - .map(|arrangement| Arranged { - arrangement, - readers: self.readers.clone(), - writer: self.writer.clone(), - }) - } - pub fn add_reader(&self, id: SubgraphId) { - self.readers.borrow_mut().push(id) - } } /// A bundle of the various ways a collection can be represented. diff --git a/src/flow/src/expr/error.rs b/src/flow/src/expr/error.rs index 4b69b3df235e..992d5c592125 100644 --- a/src/flow/src/expr/error.rs +++ b/src/flow/src/expr/error.rs @@ -21,11 +21,6 @@ use datafusion_common::DataFusionError; use datatypes::data_type::ConcreteDataType; use snafu::{Location, Snafu}; -fn is_send_sync() { - fn check() {} - check::(); -} - /// EvalError is about errors happen on columnar evaluation /// /// TODO(discord9): add detailed location of column/operator(instead of code) to errors tp help identify related column diff --git a/src/flow/src/expr/linear.rs b/src/flow/src/expr/linear.rs index 8e220f7d86a2..373e467aba1b 100644 --- a/src/flow/src/expr/linear.rs +++ b/src/flow/src/expr/linear.rs @@ -359,14 +359,6 @@ impl MapFilterProject { ) } - /// Convert the `MapFilterProject` into a staged evaluation plan. - /// - /// The main behavior is extract temporal predicates, which cannot be evaluated - /// using the standard machinery. - pub fn into_plan(self) -> Result { - MfpPlan::create_from(self) - } - /// Lists input columns whose values are used in outputs. /// /// It is entirely appropriate to determine the demand of an instance @@ -602,26 +594,6 @@ impl SafeMfpPlan { } } - /// A version of `evaluate` which produces an iterator over `Datum` - /// as output. - /// - /// This version can be useful when one wants to capture the resulting - /// datums without packing and then unpacking a row. - #[inline(always)] - pub fn evaluate_iter<'a>( - &'a self, - datums: &'a mut Vec, - ) -> Result + 'a>, EvalError> { - let passed_predicates = self.evaluate_inner(datums)?; - if !passed_predicates { - Ok(None) - } else { - Ok(Some( - self.mfp.projection.iter().map(move |i| datums[*i].clone()), - )) - } - } - /// Populates `values` with `self.expressions` and tests `self.predicates`. /// /// This does not apply `self.projection`, which is up to the calling method. diff --git a/src/flow/src/plan.rs b/src/flow/src/plan.rs index dc86b984ed23..e1cf22e621ec 100644 --- a/src/flow/src/plan.rs +++ b/src/flow/src/plan.rs @@ -18,10 +18,8 @@ mod join; mod reduce; -use std::collections::BTreeSet; - use crate::error::Error; -use crate::expr::{GlobalId, Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr}; +use crate::expr::{Id, LocalId, MapFilterProject, SafeMfpPlan, TypedExpr}; use crate::plan::join::JoinPlan; pub(crate) use crate::plan::reduce::{AccumulablePlan, AggrWithIndex, KeyValPlan, ReducePlan}; use crate::repr::{DiffRow, RelationDesc}; @@ -186,48 +184,6 @@ pub enum Plan { }, } -impl Plan { - /// Find all the used collection in the plan - pub fn find_used_collection(&self) -> BTreeSet { - fn recur_find_use(plan: &Plan, used: &mut BTreeSet) { - match plan { - Plan::Get { id } => { - match id { - Id::Local(_) => (), - Id::Global(g) => { - used.insert(*g); - } - }; - } - Plan::Let { value, body, .. } => { - recur_find_use(&value.plan, used); - recur_find_use(&body.plan, used); - } - Plan::Mfp { input, .. } => { - recur_find_use(&input.plan, used); - } - Plan::Reduce { input, .. } => { - recur_find_use(&input.plan, used); - } - Plan::Join { inputs, .. } => { - for input in inputs { - recur_find_use(&input.plan, used); - } - } - Plan::Union { inputs, .. } => { - for input in inputs { - recur_find_use(&input.plan, used); - } - } - _ => {} - } - } - let mut ret = Default::default(); - recur_find_use(self, &mut ret); - ret - } -} - impl Plan { pub fn with_types(self, schema: RelationDesc) -> TypedPlan { TypedPlan { schema, plan: self } diff --git a/src/flow/src/repr/relation.rs b/src/flow/src/repr/relation.rs index 54ad1c5e8ec4..d0fbb861eb24 100644 --- a/src/flow/src/repr/relation.rs +++ b/src/flow/src/repr/relation.rs @@ -46,14 +46,6 @@ impl Key { self.column_indices.push(col); } - /// Add columns to Key - pub fn add_cols(&mut self, cols: I) - where - I: IntoIterator, - { - self.column_indices.extend(cols); - } - /// Remove a column from Key pub fn remove_col(&mut self, col: usize) { self.column_indices.retain(|&r| r != col); diff --git a/src/meta-srv/src/metasrv.rs b/src/meta-srv/src/metasrv.rs index da614ac9b943..c7dcd81e9f09 100644 --- a/src/meta-srv/src/metasrv.rs +++ b/src/meta-srv/src/metasrv.rs @@ -204,10 +204,6 @@ impl Context { pub fn reset_in_memory(&self) { self.in_memory.reset(); } - - pub fn reset_leader_cached_kv_backend(&self) { - self.leader_cached_kv_backend.reset(); - } } /// The value of the leader. It is used to store the leader's address. diff --git a/src/meta-srv/src/mocks.rs b/src/meta-srv/src/mocks.rs index cf9144dc3900..9611fcdd13df 100644 --- a/src/meta-srv/src/mocks.rs +++ b/src/meta-srv/src/mocks.rs @@ -52,11 +52,6 @@ pub async fn mock_with_etcdstore(addr: &str) -> MockInfo { mock(Default::default(), kv_backend, None, None, None).await } -pub async fn mock_with_memstore_and_selector(selector: SelectorRef) -> MockInfo { - let kv_backend = Arc::new(MemoryKvBackend::new()); - mock(Default::default(), kv_backend, Some(selector), None, None).await -} - pub async fn mock( opts: MetasrvOptions, kv_backend: KvBackendRef, diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index 40df9401cb24..1baa0c04d4a1 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -364,12 +364,6 @@ impl Context { Ok(datanode_value.as_ref().unwrap()) } - /// Removes the `table_info` of [VolatileContext], returns true if any. - pub fn remove_table_info_value(&mut self) -> bool { - let value = self.volatile_ctx.table_info.take(); - value.is_some() - } - /// Returns the [RegionId]. pub fn region_id(&self) -> RegionId { self.persistent_ctx.region_id diff --git a/src/script/src/python/ffi_types/copr.rs b/src/script/src/python/ffi_types/copr.rs index 1a9a88466b59..e0037550a649 100644 --- a/src/script/src/python/ffi_types/copr.rs +++ b/src/script/src/python/ffi_types/copr.rs @@ -499,26 +499,6 @@ pub fn exec_parsed( } } -/// execute script just like [`exec_coprocessor`] do, -/// but instead of return a internal [`Error`] type, -/// return a friendly String format of error -/// -/// use `ln_offset` and `filename` to offset line number and mark file name in error prompt -#[cfg(test)] -#[allow(dead_code)] -pub fn exec_copr_print( - script: &str, - rb: &Option, - ln_offset: usize, - filename: &str, - eval_ctx: &EvalContext, -) -> StdResult { - let res = exec_coprocessor(script, rb, eval_ctx); - res.map_err(|e| { - crate::python::error::pretty_print_error_in_src(script, &e, ln_offset, filename) - }) -} - #[cfg(test)] mod tests { use crate::python::ffi_types::copr::parse::parse_and_compile_copr; diff --git a/src/session/src/lib.rs b/src/session/src/lib.rs index f553fef58c42..c018d47ebced 100644 --- a/src/session/src/lib.rs +++ b/src/session/src/lib.rs @@ -97,10 +97,6 @@ impl Session { &self.conn_info } - pub fn mut_conn_info(&mut self) -> &mut ConnInfo { - &mut self.conn_info - } - pub fn timezone(&self) -> Timezone { self.mutable_inner.read().unwrap().timezone.clone() } diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 00196ed5313b..90db401cbaa6 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -34,10 +34,8 @@ pub mod truncate; use std::str::FromStr; use api::helper::ColumnDataTypeWrapper; -use api::v1::add_column_location::LocationType; -use api::v1::{AddColumnLocation as Location, SemanticType}; +use api::v1::SemanticType; use common_base::bytes::Bytes; -use common_query::AddColumnLocation; use common_time::timezone::Timezone; use common_time::Timestamp; use datatypes::prelude::ConcreteDataType; @@ -688,22 +686,6 @@ pub fn concrete_data_type_to_sql_data_type(data_type: &ConcreteDataType) -> Resu } } -pub fn sql_location_to_grpc_add_column_location( - location: &Option, -) -> Option { - match location { - Some(AddColumnLocation::First) => Some(Location { - location_type: LocationType::First.into(), - after_column_name: String::default(), - }), - Some(AddColumnLocation::After { column_name }) => Some(Location { - location_type: LocationType::After.into(), - after_column_name: column_name.to_string(), - }), - None => None, - } -} - #[cfg(test)] mod tests { use std::assert_matches::assert_matches; From 2107737db196561b6453ac148c7afb6a1550eaf7 Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Wed, 18 Dec 2024 20:41:24 +0800 Subject: [PATCH 49/59] chore: make nix compilation environment config more robust (#5183) * chore: improve nix-shell support * fix: add pkg-config * ci: add a github action to ensure build on clean system * ci: optimise dependencies of task * ci: move clean build to nightly --- .github/workflows/nightly-ci.yml | 11 +++++++++++ rust-toolchain.toml | 1 + shell.nix | 15 ++++++++++----- 3 files changed, 22 insertions(+), 5 deletions(-) diff --git a/.github/workflows/nightly-ci.yml b/.github/workflows/nightly-ci.yml index b6ff247ffb9e..285fb61a7cb7 100644 --- a/.github/workflows/nightly-ci.yml +++ b/.github/workflows/nightly-ci.yml @@ -114,6 +114,17 @@ jobs: GT_S3_REGION: ${{ vars.AWS_CI_TEST_BUCKET_REGION }} UNITTEST_LOG_DIR: "__unittest_logs" + cleanbuild-linux-nix: + runs-on: ubuntu-latest-8-cores + timeout-minutes: 60 + needs: [coverage, fmt, clippy, check] + steps: + - uses: actions/checkout@v4 + - uses: cachix/install-nix-action@v27 + with: + nix_path: nixpkgs=channel:nixos-unstable + - run: nix-shell --pure --run "cargo build" + check-status: name: Check status needs: [sqlness-test, sqlness-windows, test-on-windows] diff --git a/rust-toolchain.toml b/rust-toolchain.toml index c986eedd9716..d12222a5d332 100644 --- a/rust-toolchain.toml +++ b/rust-toolchain.toml @@ -1,2 +1,3 @@ [toolchain] channel = "nightly-2024-10-19" +components = ["rust-analyzer"] diff --git a/shell.nix b/shell.nix index b255fe845c76..ce84a032764d 100644 --- a/shell.nix +++ b/shell.nix @@ -4,19 +4,24 @@ let pkgs = import nixpkgs { config = {}; overlays = []; }; in -pkgs.mkShellNoCC { - packages = with pkgs; [ +pkgs.mkShell rec { + nativeBuildInputs = with pkgs; [ + pkg-config git clang gcc - mold - libgit2 protobuf + mold (fenix.fromToolchainFile { dir = ./.; }) - fenix.rust-analyzer cargo-nextest + taplo + ]; + + buildInputs = with pkgs; [ + libgit2 ]; + LD_LIBRARY_PATH = pkgs.lib.makeLibraryPath buildInputs; } From c9ad8c7101a182d79b5f255e28c3886d6609d12c Mon Sep 17 00:00:00 2001 From: Ning Sun Date: Wed, 18 Dec 2024 23:15:55 +0800 Subject: [PATCH 50/59] feat: show create postgresql foreign table (#5143) * feat: add show create table for pg in parser * feat: implement show create table operation * fix: adopt upstream changes --- src/datatypes/src/data_type.rs | 45 +++++++++++++++++ src/operator/src/statement.rs | 13 ++++- src/operator/src/statement/show.rs | 20 ++++++++ src/query/src/sql.rs | 48 +++++++++++++++++++ src/sql/src/parsers/show_parser.rs | 20 ++++++-- src/sql/src/statements/show.rs | 46 +++++++++++++++++- .../standalone/common/show/show_create.result | 30 ++++++++++++ .../standalone/common/show/show_create.sql | 4 ++ 8 files changed, 219 insertions(+), 7 deletions(-) diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 8f81a0c86f76..b3342cc6f525 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -370,6 +370,51 @@ impl ConcreteDataType { _ => None, } } + + /// Return the datatype name in postgres type system + pub fn postgres_datatype_name(&self) -> &'static str { + match self { + &ConcreteDataType::Null(_) => "UNKNOWN", + &ConcreteDataType::Boolean(_) => "BOOL", + &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "CHAR", + &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "INT2", + &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "INT4", + &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "INT8", + &ConcreteDataType::Float32(_) => "FLOAT4", + &ConcreteDataType::Float64(_) => "FLOAT8", + &ConcreteDataType::Binary(_) | &ConcreteDataType::Vector(_) => "BYTEA", + &ConcreteDataType::String(_) => "VARCHAR", + &ConcreteDataType::Date(_) => "DATE", + &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "TIMESTAMP", + &ConcreteDataType::Time(_) => "TIME", + &ConcreteDataType::Interval(_) => "INTERVAL", + &ConcreteDataType::Decimal128(_) => "NUMERIC", + &ConcreteDataType::Json(_) => "JSON", + ConcreteDataType::List(list) => match list.item_type() { + &ConcreteDataType::Null(_) => "UNKNOWN", + &ConcreteDataType::Boolean(_) => "_BOOL", + &ConcreteDataType::Int8(_) | &ConcreteDataType::UInt8(_) => "_CHAR", + &ConcreteDataType::Int16(_) | &ConcreteDataType::UInt16(_) => "_INT2", + &ConcreteDataType::Int32(_) | &ConcreteDataType::UInt32(_) => "_INT4", + &ConcreteDataType::Int64(_) | &ConcreteDataType::UInt64(_) => "_INT8", + &ConcreteDataType::Float32(_) => "_FLOAT4", + &ConcreteDataType::Float64(_) => "_FLOAT8", + &ConcreteDataType::Binary(_) => "_BYTEA", + &ConcreteDataType::String(_) => "_VARCHAR", + &ConcreteDataType::Date(_) => "_DATE", + &ConcreteDataType::DateTime(_) | &ConcreteDataType::Timestamp(_) => "_TIMESTAMP", + &ConcreteDataType::Time(_) => "_TIME", + &ConcreteDataType::Interval(_) => "_INTERVAL", + &ConcreteDataType::Decimal128(_) => "_NUMERIC", + &ConcreteDataType::Json(_) => "_JSON", + &ConcreteDataType::Duration(_) + | &ConcreteDataType::Dictionary(_) + | &ConcreteDataType::Vector(_) + | &ConcreteDataType::List(_) => "UNKNOWN", + }, + &ConcreteDataType::Duration(_) | &ConcreteDataType::Dictionary(_) => "UNKNOWN", + } + } } impl From<&ConcreteDataType> for ConcreteDataType { diff --git a/src/operator/src/statement.rs b/src/operator/src/statement.rs index b3251ca6bf2c..ad842a40fe28 100644 --- a/src/operator/src/statement.rs +++ b/src/operator/src/statement.rs @@ -59,6 +59,7 @@ use set::set_query_timeout; use snafu::{ensure, OptionExt, ResultExt}; use sql::statements::copy::{CopyDatabase, CopyDatabaseArgument, CopyTable, CopyTableArgument}; use sql::statements::set_variables::SetVariables; +use sql::statements::show::ShowCreateTableVariant; use sql::statements::statement::Statement; use sql::statements::OptionMap; use sql::util::format_raw_object_name; @@ -317,8 +318,16 @@ impl StatementExecutor { .context(TableNotFoundSnafu { table_name: &table })?; let table_name = TableName::new(catalog, schema, table); - self.show_create_table(table_name, table_ref, query_ctx) - .await + match show.variant { + ShowCreateTableVariant::Original => { + self.show_create_table(table_name, table_ref, query_ctx) + .await + } + ShowCreateTableVariant::PostgresForeignTable => { + self.show_create_table_for_pg(table_name, table_ref, query_ctx) + .await + } + } } Statement::ShowCreateFlow(show) => self.show_create_flow(show, query_ctx).await, Statement::ShowCreateView(show) => self.show_create_view(show, query_ctx).await, diff --git a/src/operator/src/statement/show.rs b/src/operator/src/statement/show.rs index 210ec4e7f28f..fe91c71abe24 100644 --- a/src/operator/src/statement/show.rs +++ b/src/operator/src/statement/show.rs @@ -144,6 +144,26 @@ impl StatementExecutor { .context(ExecuteStatementSnafu) } + #[tracing::instrument(skip_all)] + pub async fn show_create_table_for_pg( + &self, + table_name: TableName, + table: TableRef, + query_ctx: QueryContextRef, + ) -> Result { + let table_info = table.table_info(); + if table_info.table_type != TableType::Base { + return error::ShowCreateTableBaseOnlySnafu { + table_name: table_name.to_string(), + table_type: table_info.table_type, + } + .fail(); + } + + query::sql::show_create_foreign_table_for_pg(table, query_ctx) + .context(ExecuteStatementSnafu) + } + #[tracing::instrument(skip_all)] pub async fn show_create_view( &self, diff --git a/src/query/src/sql.rs b/src/query/src/sql.rs index 3337503d097c..7525bb904bc5 100644 --- a/src/query/src/sql.rs +++ b/src/query/src/sql.rs @@ -45,6 +45,7 @@ use datafusion_expr::{case, col, lit, Expr}; use datatypes::prelude::*; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, RawSchema, Schema}; use datatypes::vectors::StringVector; +use itertools::Itertools; use object_store::ObjectStore; use once_cell::sync::Lazy; use regex::Regex; @@ -61,6 +62,7 @@ use sql::statements::show::{ use sql::statements::statement::Statement; use sql::statements::OptionMap; use sqlparser::ast::ObjectName; +use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column}; use table::requests::{FILE_TABLE_LOCATION_KEY, FILE_TABLE_PATTERN_KEY}; use table::TableRef; @@ -763,6 +765,52 @@ pub fn show_create_table( Ok(Output::new_with_record_batches(records)) } +pub fn show_create_foreign_table_for_pg( + table: TableRef, + _query_ctx: QueryContextRef, +) -> Result { + let table_info = table.table_info(); + + let table_meta = &table_info.meta; + let table_name = &table_info.name; + let schema = &table_info.meta.schema; + let is_metric_engine = is_metric_engine(&table_meta.engine); + + let columns = schema + .column_schemas() + .iter() + .filter_map(|c| { + if is_metric_engine && is_metric_engine_internal_column(&c.name) { + None + } else { + Some(format!( + "\"{}\" {}", + c.name, + c.data_type.postgres_datatype_name() + )) + } + }) + .join(",\n "); + + let sql = format!( + r#"CREATE FOREIGN TABLE ft_{} ( + {} +) +SERVER greptimedb +OPTIONS (table_name '{}')"#, + table_name, columns, table_name + ); + + let columns = vec![ + Arc::new(StringVector::from(vec![table_name.clone()])) as _, + Arc::new(StringVector::from(vec![sql])) as _, + ]; + let records = RecordBatches::try_from_columns(SHOW_CREATE_TABLE_OUTPUT_SCHEMA.clone(), columns) + .context(error::CreateRecordBatchSnafu)?; + + Ok(Output::new_with_record_batches(records)) +} + pub fn show_create_view( view_name: ObjectName, definition: &str, diff --git a/src/sql/src/parsers/show_parser.rs b/src/sql/src/parsers/show_parser.rs index d1530c1fcbbf..fa31e813f3d6 100644 --- a/src/sql/src/parsers/show_parser.rs +++ b/src/sql/src/parsers/show_parser.rs @@ -21,9 +21,9 @@ use crate::error::{ }; use crate::parser::ParserContext; use crate::statements::show::{ - ShowColumns, ShowCreateDatabase, ShowCreateFlow, ShowCreateTable, ShowCreateView, - ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowStatus, ShowTableStatus, ShowTables, - ShowVariables, ShowViews, + ShowColumns, ShowCreateDatabase, ShowCreateFlow, ShowCreateTable, ShowCreateTableVariant, + ShowCreateView, ShowDatabases, ShowFlows, ShowIndex, ShowKind, ShowStatus, ShowTableStatus, + ShowTables, ShowVariables, ShowViews, }; use crate::statements::statement::Statement; @@ -146,7 +146,19 @@ impl ParserContext<'_> { name: table_name.to_string(), } ); - Ok(Statement::ShowCreateTable(ShowCreateTable { table_name })) + let mut variant = ShowCreateTableVariant::Original; + if self.consume_token("FOR") { + if self.consume_token("POSTGRES_FOREIGN_TABLE") { + variant = ShowCreateTableVariant::PostgresForeignTable; + } else { + self.unsupported(self.peek_token_as_string())?; + } + } + + Ok(Statement::ShowCreateTable(ShowCreateTable { + table_name, + variant, + })) } fn parse_show_create_flow(&mut self) -> Result { diff --git a/src/sql/src/statements/show.rs b/src/sql/src/statements/show.rs index 055cd7768f02..92f13422e6ef 100644 --- a/src/sql/src/statements/show.rs +++ b/src/sql/src/statements/show.rs @@ -179,12 +179,26 @@ impl Display for ShowCreateDatabase { #[derive(Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] pub struct ShowCreateTable { pub table_name: ObjectName, + pub variant: ShowCreateTableVariant, +} + +/// Variant of a show create table +#[derive(Default, Debug, Clone, PartialEq, Eq, Visit, VisitMut, Serialize)] +pub enum ShowCreateTableVariant { + #[default] + Original, + PostgresForeignTable, } impl Display for ShowCreateTable { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { let table_name = &self.table_name; - write!(f, r#"SHOW CREATE TABLE {table_name}"#) + write!(f, r#"SHOW CREATE TABLE {table_name}"#)?; + if let ShowCreateTableVariant::PostgresForeignTable = self.variant { + write!(f, " FOR POSTGRES_FOREIGN_TABLE")?; + } + + Ok(()) } } @@ -344,12 +358,31 @@ mod tests { Statement::ShowCreateTable(show) => { let table_name = show.table_name.to_string(); assert_eq!(table_name, "test"); + assert_eq!(show.variant, ShowCreateTableVariant::Original); + } + _ => { + unreachable!(); + } + } + + let sql = "SHOW CREATE TABLE test FOR POSTGRES_FOREIGN_TABLE"; + let stmts: Vec = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()) + .unwrap(); + assert_eq!(1, stmts.len()); + assert_matches!(&stmts[0], Statement::ShowCreateTable { .. }); + match &stmts[0] { + Statement::ShowCreateTable(show) => { + let table_name = show.table_name.to_string(); + assert_eq!(table_name, "test"); + assert_eq!(show.variant, ShowCreateTableVariant::PostgresForeignTable); } _ => { unreachable!(); } } } + #[test] pub fn test_show_create_missing_table_name() { let sql = "SHOW CREATE TABLE"; @@ -361,6 +394,17 @@ mod tests { .is_err()); } + #[test] + pub fn test_show_create_unknown_for() { + let sql = "SHOW CREATE TABLE t FOR UNKNOWN"; + assert!(ParserContext::create_with_dialect( + sql, + &GreptimeDbDialect {}, + ParseOptions::default() + ) + .is_err()); + } + #[test] pub fn test_show_create_flow() { let sql = "SHOW CREATE FLOW test"; diff --git a/tests/cases/standalone/common/show/show_create.result b/tests/cases/standalone/common/show/show_create.result index ec692c0f293a..85536954d435 100644 --- a/tests/cases/standalone/common/show/show_create.result +++ b/tests/cases/standalone/common/show/show_create.result @@ -46,6 +46,22 @@ SHOW CREATE TABLE system_metrics; | | ) | +----------------+-----------------------------------------------------------+ +SHOW CREATE TABLE system_metrics FOR POSTGRES_FOREIGN_TABLE; + ++----------------+------------------------------------------+ +| Table | Create Table | ++----------------+------------------------------------------+ +| system_metrics | CREATE FOREIGN TABLE ft_system_metrics ( | +| | "id" INT4, | +| | "host" VARCHAR, | +| | "cpu" FLOAT8, | +| | "disk" FLOAT4, | +| | "ts" TIMESTAMP | +| | ) | +| | SERVER greptimedb | +| | OPTIONS (table_name 'system_metrics') | ++----------------+------------------------------------------+ + DROP TABLE system_metrics; Affected Rows: 0 @@ -141,6 +157,20 @@ show create table t1; | | ) | +-------+-----------------------------------+ +SHOW CREATE TABLE t1 FOR POSTGRES_FOREIGN_TABLE; + ++-------+------------------------------+ +| Table | Create Table | ++-------+------------------------------+ +| t1 | CREATE FOREIGN TABLE ft_t1 ( | +| | "host" VARCHAR, | +| | "ts" TIMESTAMP, | +| | "val" FLOAT8 | +| | ) | +| | SERVER greptimedb | +| | OPTIONS (table_name 't1') | ++-------+------------------------------+ + drop table t1; Affected Rows: 0 diff --git a/tests/cases/standalone/common/show/show_create.sql b/tests/cases/standalone/common/show/show_create.sql index 45c8f7a3ef4c..5289df6e76f5 100644 --- a/tests/cases/standalone/common/show/show_create.sql +++ b/tests/cases/standalone/common/show/show_create.sql @@ -20,6 +20,8 @@ WITH( SHOW CREATE TABLE system_metrics; +SHOW CREATE TABLE system_metrics FOR POSTGRES_FOREIGN_TABLE; + DROP TABLE system_metrics; create table table_without_partition ( @@ -57,6 +59,8 @@ show create table phy; show create table t1; +SHOW CREATE TABLE t1 FOR POSTGRES_FOREIGN_TABLE; + drop table t1; drop table phy; From 66f0581f5b42780fd89fc53928e838600b9f8400 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 19 Dec 2024 11:29:34 +0800 Subject: [PATCH 51/59] fix: ensure table route metadata is eventually rolled back on failure (#5174) * fix: ensure table route metadata is eventually rolled back on procedure failure * fix(fuzz): enhance procedure condition checking * chore: add logs * feat: close downgraded leader region actively * chore: apply suggestions from CR --- .../src/procedure/region_migration.rs | 76 +++++++++- .../close_downgraded_region.rs | 138 ++++++++++++++++++ .../region_migration/migration_start.rs | 8 +- .../region_migration/open_candidate_region.rs | 7 +- .../procedure/region_migration/test_util.rs | 20 ++- .../region_migration/update_metadata.rs | 6 +- .../upgrade_candidate_region.rs | 9 +- .../upgrade_candidate_region.rs | 4 +- .../migration/fuzz_migrate_metric_regions.rs | 71 ++++----- .../migration/fuzz_migrate_mito_regions.rs | 6 +- 10 files changed, 274 insertions(+), 71 deletions(-) create mode 100644 src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs diff --git a/src/meta-srv/src/procedure/region_migration.rs b/src/meta-srv/src/procedure/region_migration.rs index 1baa0c04d4a1..3b27d33f2270 100644 --- a/src/meta-srv/src/procedure/region_migration.rs +++ b/src/meta-srv/src/procedure/region_migration.rs @@ -12,6 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. +pub(crate) mod close_downgraded_region; pub(crate) mod downgrade_leader_region; pub(crate) mod manager; pub(crate) mod migration_abort; @@ -43,6 +44,7 @@ use common_procedure::error::{ Error as ProcedureError, FromJsonSnafu, Result as ProcedureResult, ToJsonSnafu, }; use common_procedure::{Context as ProcedureContext, LockKey, Procedure, Status, StringKey}; +use common_telemetry::info; use manager::RegionMigrationProcedureGuard; pub use manager::{ RegionMigrationManagerRef, RegionMigrationProcedureTask, RegionMigrationProcedureTracker, @@ -91,7 +93,9 @@ impl PersistentContext { let lock_key = vec![ CatalogLock::Read(&self.catalog).into(), SchemaLock::read(&self.catalog, &self.schema).into(), - TableLock::Read(region_id.table_id()).into(), + // The optimistic updating of table route is not working very well, + // so we need to use the write lock here. + TableLock::Write(region_id.table_id()).into(), RegionLock::Write(region_id).into(), ]; @@ -253,7 +257,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get TableRoute: {table_id}"), })? .context(error::TableRouteNotFoundSnafu { table_id })?; @@ -317,7 +321,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get TableInfo: {table_id}"), })? .context(error::TableInfoNotFoundSnafu { table_id })?; @@ -350,7 +354,7 @@ impl Context { .await .context(error::TableMetadataManagerSnafu) .map_err(BoxedError::new) - .context(error::RetryLaterWithSourceSnafu { + .with_context(|_| error::RetryLaterWithSourceSnafu { reason: format!("Failed to get DatanodeTable: ({datanode_id},{table_id})"), })? .context(error::DatanodeTableNotFoundSnafu { @@ -468,6 +472,48 @@ impl RegionMigrationProcedure { _guard: guard, }) } + + async fn rollback_inner(&mut self) -> Result<()> { + let _timer = METRIC_META_REGION_MIGRATION_EXECUTE + .with_label_values(&["rollback"]) + .start_timer(); + + let table_id = self.context.region_id().table_id(); + let region_id = self.context.region_id(); + self.context.remove_table_route_value(); + let table_metadata_manager = self.context.table_metadata_manager.clone(); + let table_route = self.context.get_table_route_value().await?; + + // Safety: It must be a physical table route. + let downgraded = table_route + .region_routes() + .unwrap() + .iter() + .filter(|route| route.region.id == region_id) + .any(|route| route.is_leader_downgrading()); + + if downgraded { + info!("Rollbacking downgraded region leader table route, region: {region_id}"); + table_metadata_manager + .update_leader_region_status(table_id, table_route, |route| { + if route.region.id == region_id { + Some(None) + } else { + None + } + }) + .await + .context(error::TableMetadataManagerSnafu) + .map_err(BoxedError::new) + .with_context(|_| error::RetryLaterWithSourceSnafu { + reason: format!("Failed to update the table route during the rollback downgraded leader region: {region_id}"), + })?; + } + + self.context.register_failure_detectors().await; + + Ok(()) + } } #[async_trait::async_trait] @@ -476,6 +522,16 @@ impl Procedure for RegionMigrationProcedure { Self::TYPE_NAME } + async fn rollback(&mut self, _ctx: &ProcedureContext) -> ProcedureResult<()> { + self.rollback_inner() + .await + .map_err(ProcedureError::external) + } + + fn rollback_supported(&self) -> bool { + true + } + async fn execute(&mut self, _ctx: &ProcedureContext) -> ProcedureResult { let state = &mut self.state; @@ -701,6 +757,12 @@ mod tests { Assertion::simple(assert_update_metadata_upgrade, assert_no_persist), ), // UpdateMetadata::Upgrade + Step::next( + "Should be the close downgraded region", + None, + Assertion::simple(assert_close_downgraded_region, assert_no_persist), + ), + // CloseDowngradedRegion Step::next( "Should be the region migration end", None, @@ -1071,6 +1133,12 @@ mod tests { Assertion::simple(assert_update_metadata_upgrade, assert_no_persist), ), // UpdateMetadata::Upgrade + Step::next( + "Should be the close downgraded region", + None, + Assertion::simple(assert_close_downgraded_region, assert_no_persist), + ), + // CloseDowngradedRegion Step::next( "Should be the region migration end", None, diff --git a/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs new file mode 100644 index 000000000000..9113607681cc --- /dev/null +++ b/src/meta-srv/src/procedure/region_migration/close_downgraded_region.rs @@ -0,0 +1,138 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::any::Any; +use std::time::Duration; + +use api::v1::meta::MailboxMessage; +use common_meta::distributed_time_constants::MAILBOX_RTT_SECS; +use common_meta::instruction::{Instruction, InstructionReply, SimpleReply}; +use common_meta::key::datanode_table::RegionInfo; +use common_meta::RegionIdent; +use common_procedure::Status; +use common_telemetry::{info, warn}; +use serde::{Deserialize, Serialize}; +use snafu::ResultExt; + +use crate::error::{self, Result}; +use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::migration_end::RegionMigrationEnd; +use crate::procedure::region_migration::{Context, State}; +use crate::service::mailbox::Channel; + +const CLOSE_DOWNGRADED_REGION_TIMEOUT: Duration = Duration::from_secs(MAILBOX_RTT_SECS); + +#[derive(Debug, Serialize, Deserialize)] +pub struct CloseDowngradedRegion; + +#[async_trait::async_trait] +#[typetag::serde] +impl State for CloseDowngradedRegion { + async fn next(&mut self, ctx: &mut Context) -> Result<(Box, Status)> { + if let Err(err) = self.close_downgraded_leader_region(ctx).await { + let downgrade_leader_datanode = &ctx.persistent_ctx.from_peer; + let region_id = ctx.region_id(); + warn!(err; "Failed to close downgraded leader region: {region_id} on datanode {:?}", downgrade_leader_datanode); + } + + Ok((Box::new(RegionMigrationEnd), Status::done())) + } + + fn as_any(&self) -> &dyn Any { + self + } +} + +impl CloseDowngradedRegion { + /// Builds close region instruction. + /// + /// Abort(non-retry): + /// - Datanode Table is not found. + async fn build_close_region_instruction(&self, ctx: &mut Context) -> Result { + let pc = &ctx.persistent_ctx; + let downgrade_leader_datanode_id = pc.from_peer.id; + let cluster_id = pc.cluster_id; + let table_id = pc.region_id.table_id(); + let region_number = pc.region_id.region_number(); + let datanode_table_value = ctx.get_from_peer_datanode_table_value().await?; + + let RegionInfo { engine, .. } = datanode_table_value.region_info.clone(); + + Ok(Instruction::CloseRegion(RegionIdent { + cluster_id, + datanode_id: downgrade_leader_datanode_id, + table_id, + region_number, + engine, + })) + } + + /// Closes the downgraded leader region. + async fn close_downgraded_leader_region(&self, ctx: &mut Context) -> Result<()> { + let close_instruction = self.build_close_region_instruction(ctx).await?; + let region_id = ctx.region_id(); + let pc = &ctx.persistent_ctx; + let downgrade_leader_datanode = &pc.from_peer; + let msg = MailboxMessage::json_message( + &format!("Close downgraded region: {}", region_id), + &format!("Meta@{}", ctx.server_addr()), + &format!( + "Datanode-{}@{}", + downgrade_leader_datanode.id, downgrade_leader_datanode.addr + ), + common_time::util::current_time_millis(), + &close_instruction, + ) + .with_context(|_| error::SerializeToJsonSnafu { + input: close_instruction.to_string(), + })?; + + let ch = Channel::Datanode(downgrade_leader_datanode.id); + let receiver = ctx + .mailbox + .send(&ch, msg, CLOSE_DOWNGRADED_REGION_TIMEOUT) + .await?; + + match receiver.await? { + Ok(msg) => { + let reply = HeartbeatMailbox::json_reply(&msg)?; + info!( + "Received close downgraded leade region reply: {:?}, region: {}", + reply, region_id + ); + let InstructionReply::CloseRegion(SimpleReply { result, error }) = reply else { + return error::UnexpectedInstructionReplySnafu { + mailbox_message: msg.to_string(), + reason: "expect close region reply", + } + .fail(); + }; + + if result { + Ok(()) + } else { + error::UnexpectedSnafu { + violated: format!( + "Failed to close downgraded leader region: {region_id} on datanode {:?}, error: {error:?}", + downgrade_leader_datanode, + ), + } + .fail() + } + } + + Err(e) => Err(e), + } + } +} diff --git a/src/meta-srv/src/procedure/region_migration/migration_start.rs b/src/meta-srv/src/procedure/region_migration/migration_start.rs index 3f8103341029..4c097631d35f 100644 --- a/src/meta-srv/src/procedure/region_migration/migration_start.rs +++ b/src/meta-srv/src/procedure/region_migration/migration_start.rs @@ -21,11 +21,11 @@ use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; use store_api::storage::RegionId; -use super::migration_abort::RegionMigrationAbort; -use super::migration_end::RegionMigrationEnd; -use super::open_candidate_region::OpenCandidateRegion; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; +use crate::procedure::region_migration::migration_end::RegionMigrationEnd; +use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; /// The behaviors: diff --git a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs index 22b64b01423f..6a96540b82fb 100644 --- a/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/open_candidate_region.rs @@ -25,9 +25,9 @@ use common_telemetry::info; use serde::{Deserialize, Serialize}; use snafu::{OptionExt, ResultExt}; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; use crate::service::mailbox::Channel; @@ -145,7 +145,10 @@ impl OpenCandidateRegion { match receiver.await? { Ok(msg) => { let reply = HeartbeatMailbox::json_reply(&msg)?; - info!("Received open region reply: {:?}", reply); + info!( + "Received open region reply: {:?}, region: {}", + reply, region_id + ); let InstructionReply::OpenRegion(SimpleReply { result, error }) = reply else { return error::UnexpectedInstructionReplySnafu { mailbox_message: msg.to_string(), diff --git a/src/meta-srv/src/procedure/region_migration/test_util.rs b/src/meta-srv/src/procedure/region_migration/test_util.rs index 2058782396e8..2fe55edcab41 100644 --- a/src/meta-srv/src/procedure/region_migration/test_util.rs +++ b/src/meta-srv/src/procedure/region_migration/test_util.rs @@ -44,19 +44,21 @@ use store_api::storage::RegionId; use table::metadata::RawTableInfo; use tokio::sync::mpsc::{Receiver, Sender}; -use super::manager::RegionMigrationProcedureTracker; -use super::migration_abort::RegionMigrationAbort; -use super::upgrade_candidate_region::UpgradeCandidateRegion; -use super::{Context, ContextFactory, DefaultContextFactory, State, VolatileContext}; use crate::cache_invalidator::MetasrvCacheInvalidator; use crate::error::{self, Error, Result}; use crate::handler::{HeartbeatMailbox, Pusher, Pushers}; use crate::metasrv::MetasrvInfo; +use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion; +use crate::procedure::region_migration::manager::RegionMigrationProcedureTracker; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; use crate::procedure::region_migration::migration_end::RegionMigrationEnd; use crate::procedure::region_migration::open_candidate_region::OpenCandidateRegion; use crate::procedure::region_migration::update_metadata::UpdateMetadata; -use crate::procedure::region_migration::PersistentContext; +use crate::procedure::region_migration::upgrade_candidate_region::UpgradeCandidateRegion; +use crate::procedure::region_migration::{ + Context, ContextFactory, DefaultContextFactory, PersistentContext, State, VolatileContext, +}; use crate::service::mailbox::{Channel, MailboxRef}; pub type MockHeartbeatReceiver = Receiver>; @@ -569,6 +571,14 @@ pub(crate) fn assert_region_migration_end(next: &dyn State) { let _ = next.as_any().downcast_ref::().unwrap(); } +/// Asserts the [State] should be [CloseDowngradedRegion]. +pub(crate) fn assert_close_downgraded_region(next: &dyn State) { + let _ = next + .as_any() + .downcast_ref::() + .unwrap(); +} + /// Asserts the [State] should be [RegionMigrationAbort]. pub(crate) fn assert_region_migration_abort(next: &dyn State) { let _ = next diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata.rs b/src/meta-srv/src/procedure/region_migration/update_metadata.rs index 180cf31fe1c4..858669ea2136 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata.rs @@ -22,10 +22,10 @@ use common_procedure::Status; use common_telemetry::warn; use serde::{Deserialize, Serialize}; -use super::migration_abort::RegionMigrationAbort; -use super::migration_end::RegionMigrationEnd; use crate::error::Result; +use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::downgrade_leader_region::DowngradeLeaderRegion; +use crate::procedure::region_migration::migration_abort::RegionMigrationAbort; use crate::procedure::region_migration::{Context, State}; #[derive(Debug, Serialize, Deserialize)] @@ -58,7 +58,7 @@ impl State for UpdateMetadata { if let Err(err) = ctx.invalidate_table_cache().await { warn!("Failed to broadcast the invalidate table cache message during the upgrade candidate, error: {err:?}"); }; - Ok((Box::new(RegionMigrationEnd), Status::done())) + Ok((Box::new(CloseDowngradedRegion), Status::executing(false))) } UpdateMetadata::Rollback => { self.rollback_downgraded_region(ctx).await?; diff --git a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs index b710a0e1f3e0..c180456bd47b 100644 --- a/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/update_metadata/upgrade_candidate_region.rs @@ -195,7 +195,7 @@ mod tests { use store_api::storage::RegionId; use crate::error::Error; - use crate::procedure::region_migration::migration_end::RegionMigrationEnd; + use crate::procedure::region_migration::close_downgraded_region::CloseDowngradedRegion; use crate::procedure::region_migration::test_util::{self, TestingEnv}; use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{ContextFactory, PersistentContext, State}; @@ -443,7 +443,7 @@ mod tests { } #[tokio::test] - async fn test_next_migration_end_state() { + async fn test_next_close_downgraded_region_state() { let mut state = Box::new(UpdateMetadata::Upgrade); let env = TestingEnv::new(); let persistent_context = new_persistent_context(); @@ -471,7 +471,10 @@ mod tests { let (next, _) = state.next(&mut ctx).await.unwrap(); - let _ = next.as_any().downcast_ref::().unwrap(); + let _ = next + .as_any() + .downcast_ref::() + .unwrap(); let table_route = table_metadata_manager .table_route_manager() diff --git a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs index 49100e92f36e..fa989274b44e 100644 --- a/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs +++ b/src/meta-srv/src/procedure/region_migration/upgrade_candidate_region.rs @@ -23,9 +23,9 @@ use serde::{Deserialize, Serialize}; use snafu::{ensure, OptionExt, ResultExt}; use tokio::time::{sleep, Instant}; -use super::update_metadata::UpdateMetadata; use crate::error::{self, Result}; use crate::handler::HeartbeatMailbox; +use crate::procedure::region_migration::update_metadata::UpdateMetadata; use crate::procedure::region_migration::{Context, State}; use crate::service::mailbox::Channel; @@ -155,7 +155,7 @@ impl UpgradeCandidateRegion { exists, error::UnexpectedSnafu { violated: format!( - "Expected region {} doesn't exist on datanode {:?}", + "Candidate region {} doesn't exist on datanode {:?}", region_id, candidate ) } diff --git a/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs b/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs index d4fa4d08fd5e..5bcddea53abf 100644 --- a/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs +++ b/tests-fuzz/targets/migration/fuzz_migrate_metric_regions.rs @@ -229,6 +229,29 @@ async fn create_logical_table_and_insert_values( Ok(()) } +async fn wait_for_migration(ctx: &FuzzContext, migration: &Migration, procedure_id: &str) { + info!("Waits for migration: {migration:?}"); + let region_id = migration.region_id.as_u64(); + wait_condition_fn( + Duration::from_secs(120), + || { + let greptime = ctx.greptime.clone(); + let procedure_id = procedure_id.to_string(); + Box::pin(async move { + let output = procedure_state(&greptime, &procedure_id).await; + info!("Checking procedure: {procedure_id}, output: {output}"); + (fetch_partition(&greptime, region_id).await.unwrap(), output) + }) + }, + |(partition, output)| { + info!("Region: {region_id}, datanode: {}", partition.datanode_id); + partition.datanode_id == migration.to_peer && output.contains("Done") + }, + Duration::from_secs(1), + ) + .await; +} + async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { let mut rng = ChaCha20Rng::seed_from_u64(input.seed); // Creates a physical table. @@ -297,28 +320,7 @@ async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { } info!("Excepted new region distribution: {new_distribution:?}"); for (migration, procedure_id) in migrations.clone().into_iter().zip(procedure_ids) { - info!("Waits for migration: {migration:?}"); - let region_id = migration.region_id.as_u64(); - wait_condition_fn( - Duration::from_secs(120), - || { - let greptime = ctx.greptime.clone(); - let procedure_id = procedure_id.to_string(); - Box::pin(async move { - { - let output = procedure_state(&greptime, &procedure_id).await; - info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() - } - }) - }, - |partition| { - info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer - }, - Duration::from_secs(1), - ) - .await; + wait_for_migration(&ctx, &migration, &procedure_id).await; } // Validates value rows @@ -388,29 +390,8 @@ async fn execute_migration(ctx: FuzzContext, input: FuzzInput) -> Result<()> { procedure_ids.push(procedure_id); } info!("Excepted new region distribution: {new_distribution:?}"); - for (migration, procedure_id) in migrations.into_iter().zip(procedure_ids) { - info!("Waits for migration: {migration:?}"); - let region_id = migration.region_id.as_u64(); - wait_condition_fn( - Duration::from_secs(120), - || { - let greptime = ctx.greptime.clone(); - let procedure_id = procedure_id.to_string(); - Box::pin(async move { - { - let output = procedure_state(&greptime, &procedure_id).await; - info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() - } - }) - }, - |partition| { - info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer - }, - Duration::from_secs(1), - ) - .await; + for (migration, procedure_id) in migrations.clone().into_iter().zip(procedure_ids) { + wait_for_migration(&ctx, &migration, &procedure_id).await; } // Creates more logical tables and inserts values diff --git a/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs b/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs index 3f15e859c444..12c4cdae49e1 100644 --- a/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs +++ b/tests-fuzz/targets/migration/fuzz_migrate_mito_regions.rs @@ -248,13 +248,13 @@ async fn migrate_regions(ctx: &FuzzContext, migrations: &[Migration]) -> Result< { let output = procedure_state(&greptime, &procedure_id).await; info!("Checking procedure: {procedure_id}, output: {output}"); - fetch_partition(&greptime, region_id).await.unwrap() + (fetch_partition(&greptime, region_id).await.unwrap(), output) } }) }, - |partition| { + |(partition, output)| { info!("Region: {region_id}, datanode: {}", partition.datanode_id); - partition.datanode_id == migration.to_peer + partition.datanode_id == migration.to_peer && output.contains("Done") }, Duration::from_secs(5), ) From 422d18da8bbdaba3b3a9b93bea6ef9bc3b76ab2f Mon Sep 17 00:00:00 2001 From: Ruihang Xia Date: Thu, 19 Dec 2024 11:42:05 +0800 Subject: [PATCH 52/59] feat: bump opendal and switch prometheus layer to the upstream impl (#5179) * feat: bump opendal and switch prometheus layer to the upstream impl Signed-off-by: Ruihang Xia * remove unused files Signed-off-by: Ruihang Xia * fix tests Signed-off-by: Ruihang Xia * remove unused things Signed-off-by: Ruihang Xia * remove root dir on recovering cache Signed-off-by: Ruihang Xia * filter out non-files entry in test Signed-off-by: Ruihang Xia --------- Signed-off-by: Ruihang Xia --- Cargo.lock | 25 +- src/common/datasource/src/object_store/fs.rs | 2 +- src/common/datasource/src/object_store/s3.rs | 2 +- src/common/procedure/src/local/runner.rs | 8 +- src/datanode/src/error.rs | 15 +- src/datanode/src/store.rs | 5 +- src/file-engine/src/manifest.rs | 2 +- src/file-engine/src/region.rs | 6 +- src/metric-engine/src/test_util.rs | 4 +- src/mito2/src/cache/file_cache.rs | 4 +- src/mito2/src/engine/create_test.rs | 4 +- src/mito2/src/engine/drop_test.rs | 12 +- src/mito2/src/engine/open_test.rs | 4 +- src/mito2/src/manifest/tests/checkpoint.rs | 2 + src/mito2/src/sst/file_purger.rs | 6 +- src/mito2/src/worker/handle_open.rs | 2 +- src/object-store/Cargo.toml | 3 +- src/object-store/src/layers.rs | 33 +- .../src/layers/lru_cache/read_cache.rs | 9 +- src/object-store/src/layers/prometheus.rs | 584 ------------------ src/object-store/src/util.rs | 49 +- src/object-store/tests/object_store_test.rs | 61 +- 22 files changed, 134 insertions(+), 708 deletions(-) delete mode 100644 src/object-store/src/layers/prometheus.rs diff --git a/Cargo.lock b/Cargo.lock index a0225cf27dbe..fa8ba34d1a3b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -866,18 +866,6 @@ dependencies = [ "rand", ] -[[package]] -name = "backon" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0" -dependencies = [ - "fastrand", - "futures-core", - "pin-project", - "tokio", -] - [[package]] name = "backon" version = "1.2.0" @@ -2228,7 +2216,7 @@ version = "0.12.0" dependencies = [ "async-stream", "async-trait", - "backon 1.2.0", + "backon", "common-base", "common-error", "common-macro", @@ -7386,13 +7374,13 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "opendal" -version = "0.49.2" +version = "0.50.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b04d09b9822c2f75a1d2fc513a2c1279c70e91e7407936fffdf6a6976ec530a" +checksum = "cb28bb6c64e116ceaf8dd4e87099d3cfea4a58e85e62b104fef74c91afba0f44" dependencies = [ "anyhow", "async-trait", - "backon 0.4.4", + "backon", "base64 0.22.1", "bytes", "chrono", @@ -7405,6 +7393,7 @@ dependencies = [ "md-5", "once_cell", "percent-encoding", + "prometheus", "quick-xml 0.36.2", "reqsign", "reqwest", @@ -9387,9 +9376,9 @@ dependencies = [ [[package]] name = "reqsign" -version = "0.16.0" +version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03dd4ba7c3901dd43e6b8c7446a760d45bc1ea4301002e1a6fa48f97c3a796fa" +checksum = "eb0075a66c8bfbf4cc8b70dca166e722e1f55a3ea9250ecbb85f4d92a5f64149" dependencies = [ "anyhow", "async-trait", diff --git a/src/common/datasource/src/object_store/fs.rs b/src/common/datasource/src/object_store/fs.rs index f87311f517b7..5ffbbfa3148a 100644 --- a/src/common/datasource/src/object_store/fs.rs +++ b/src/common/datasource/src/object_store/fs.rs @@ -27,7 +27,7 @@ pub fn build_fs_backend(root: &str) -> Result { DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::PrometheusMetricsLayer::new(true)) + .layer(object_store::layers::build_prometheus_metrics_layer(true)) .finish(); Ok(object_store) } diff --git a/src/common/datasource/src/object_store/s3.rs b/src/common/datasource/src/object_store/s3.rs index e141621b899b..0d83eb7a98b8 100644 --- a/src/common/datasource/src/object_store/s3.rs +++ b/src/common/datasource/src/object_store/s3.rs @@ -89,7 +89,7 @@ pub fn build_s3_backend( DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::PrometheusMetricsLayer::new(true)) + .layer(object_store::layers::build_prometheus_metrics_layer(true)) .finish()) } diff --git a/src/common/procedure/src/local/runner.rs b/src/common/procedure/src/local/runner.rs index c2d15001fba3..bf277a0e72e5 100644 --- a/src/common/procedure/src/local/runner.rs +++ b/src/common/procedure/src/local/runner.rs @@ -544,7 +544,7 @@ mod tests { use common_test_util::temp_dir::create_temp_dir; use futures_util::future::BoxFuture; use futures_util::FutureExt; - use object_store::ObjectStore; + use object_store::{EntryMode, ObjectStore}; use tokio::sync::mpsc; use super::*; @@ -578,7 +578,11 @@ mod tests { ) { let dir = proc_path!(procedure_store, "{procedure_id}/"); let lister = object_store.list(&dir).await.unwrap(); - let mut files_in_dir: Vec<_> = lister.into_iter().map(|de| de.name().to_string()).collect(); + let mut files_in_dir: Vec<_> = lister + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .map(|de| de.name().to_string()) + .collect(); files_in_dir.sort_unstable(); assert_eq!(files, files_in_dir); } diff --git a/src/datanode/src/error.rs b/src/datanode/src/error.rs index 9fbd46e16009..61a4eae12883 100644 --- a/src/datanode/src/error.rs +++ b/src/datanode/src/error.rs @@ -193,6 +193,14 @@ pub enum Error { location: Location, }, + #[snafu(display("Failed to build http client"))] + BuildHttpClient { + #[snafu(implicit)] + location: Location, + #[snafu(source)] + error: reqwest::Error, + }, + #[snafu(display("Missing required field: {}", name))] MissingRequiredField { name: String, @@ -406,9 +414,10 @@ impl ErrorExt for Error { | MissingKvBackend { .. } | TomlFormat { .. } => StatusCode::InvalidArguments, - PayloadNotExist { .. } | Unexpected { .. } | WatchAsyncTaskChange { .. } => { - StatusCode::Unexpected - } + PayloadNotExist { .. } + | Unexpected { .. } + | WatchAsyncTaskChange { .. } + | BuildHttpClient { .. } => StatusCode::Unexpected, AsyncTaskExecute { source, .. } => source.status_code(), diff --git a/src/datanode/src/store.rs b/src/datanode/src/store.rs index c78afe448e0c..52a1cba982e1 100644 --- a/src/datanode/src/store.rs +++ b/src/datanode/src/store.rs @@ -32,7 +32,7 @@ use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder, O use snafu::prelude::*; use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE}; -use crate::error::{self, CreateDirSnafu, Result}; +use crate::error::{self, BuildHttpClientSnafu, CreateDirSnafu, Result}; pub(crate) async fn new_raw_object_store( store: &ObjectStoreConfig, @@ -236,7 +236,8 @@ pub(crate) fn build_http_client(config: &HttpClientConfig) -> Result builder.timeout(config.timeout) }; - HttpClient::build(http_builder).context(error::InitBackendSnafu) + let client = http_builder.build().context(BuildHttpClientSnafu)?; + Ok(HttpClient::with(client)) } struct PrintDetailedError; diff --git a/src/file-engine/src/manifest.rs b/src/file-engine/src/manifest.rs index 6310c3ccb912..6bf5ee104ba2 100644 --- a/src/file-engine/src/manifest.rs +++ b/src/file-engine/src/manifest.rs @@ -46,7 +46,7 @@ impl FileRegionManifest { pub async fn store(&self, region_dir: &str, object_store: &ObjectStore) -> Result<()> { let path = ®ion_manifest_path(region_dir); let exist = object_store - .is_exist(path) + .exists(path) .await .context(CheckObjectSnafu { path })?; ensure!(!exist, ManifestExistsSnafu { path }); diff --git a/src/file-engine/src/region.rs b/src/file-engine/src/region.rs index a5af6822285e..673d352b1e63 100644 --- a/src/file-engine/src/region.rs +++ b/src/file-engine/src/region.rs @@ -130,7 +130,7 @@ mod tests { assert_eq!(region.metadata.primary_key, vec![1]); assert!(object_store - .is_exist("create_region_dir/manifest/_file_manifest") + .exists("create_region_dir/manifest/_file_manifest") .await .unwrap()); @@ -198,13 +198,13 @@ mod tests { .unwrap(); assert!(object_store - .is_exist("drop_region_dir/manifest/_file_manifest") + .exists("drop_region_dir/manifest/_file_manifest") .await .unwrap()); FileRegion::drop(®ion, &object_store).await.unwrap(); assert!(!object_store - .is_exist("drop_region_dir/manifest/_file_manifest") + .exists("drop_region_dir/manifest/_file_manifest") .await .unwrap()); diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index c5f7a2b4a32c..d0f8cf5028e6 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -313,12 +313,12 @@ mod test { let region_dir = "test_metric_region"; // assert metadata region's dir let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR); - let exist = object_store.is_exist(&metadata_region_dir).await.unwrap(); + let exist = object_store.exists(&metadata_region_dir).await.unwrap(); assert!(exist); // assert data region's dir let data_region_dir = join_dir(region_dir, DATA_REGION_SUBDIR); - let exist = object_store.is_exist(&data_region_dir).await.unwrap(); + let exist = object_store.exists(&data_region_dir).await.unwrap(); assert!(exist); // check mito engine diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs index 9e5742ca0410..eb112530cad7 100644 --- a/src/mito2/src/cache/file_cache.rs +++ b/src/mito2/src/cache/file_cache.rs @@ -286,7 +286,7 @@ impl FileCache { } async fn get_reader(&self, file_path: &str) -> object_store::Result> { - if self.local_store.is_exist(file_path).await? { + if self.local_store.exists(file_path).await? { Ok(Some(self.local_store.reader(file_path).await?)) } else { Ok(None) @@ -480,7 +480,7 @@ mod tests { cache.memory_index.run_pending_tasks().await; // The file also not exists. - assert!(!local_store.is_exist(&file_path).await.unwrap()); + assert!(!local_store.exists(&file_path).await.unwrap()); assert_eq!(0, cache.memory_index.weighted_size()); } diff --git a/src/mito2/src/engine/create_test.rs b/src/mito2/src/engine/create_test.rs index 48b04dc86d91..4bcc55934034 100644 --- a/src/mito2/src/engine/create_test.rs +++ b/src/mito2/src/engine/create_test.rs @@ -192,12 +192,12 @@ async fn test_engine_create_with_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(region_dir) + .exists(region_dir) .await .unwrap()); assert!(!object_store_manager .default_object_store() - .is_exist(region_dir) + .exists(region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/drop_test.rs b/src/mito2/src/engine/drop_test.rs index 7d719f778be9..5d0c5afbf06e 100644 --- a/src/mito2/src/engine/drop_test.rs +++ b/src/mito2/src/engine/drop_test.rs @@ -71,7 +71,7 @@ async fn test_engine_drop_region() { assert!(!env .get_object_store() .unwrap() - .is_exist(&join_path(®ion_dir, DROPPING_MARKER_FILE)) + .exists(&join_path(®ion_dir, DROPPING_MARKER_FILE)) .await .unwrap()); @@ -93,7 +93,7 @@ async fn test_engine_drop_region() { listener.wait().await; let object_store = env.get_object_store().unwrap(); - assert!(!object_store.is_exist(®ion_dir).await.unwrap()); + assert!(!object_store.exists(®ion_dir).await.unwrap()); } #[tokio::test] @@ -167,13 +167,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(&custom_region_dir) + .exists(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .is_exist(&global_region_dir) + .exists(&global_region_dir) .await .unwrap()); @@ -190,13 +190,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(!object_store_manager .find("Gcs") .unwrap() - .is_exist(&custom_region_dir) + .exists(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .is_exist(&global_region_dir) + .exists(&global_region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/open_test.rs b/src/mito2/src/engine/open_test.rs index 6752bbd04b12..a3b51514c287 100644 --- a/src/mito2/src/engine/open_test.rs +++ b/src/mito2/src/engine/open_test.rs @@ -228,13 +228,13 @@ async fn test_engine_region_open_with_custom_store() { let object_store_manager = env.get_object_store_manager().unwrap(); assert!(!object_store_manager .default_object_store() - .is_exist(region.access_layer.region_dir()) + .exists(region.access_layer.region_dir()) .await .unwrap()); assert!(object_store_manager .find("Gcs") .unwrap() - .is_exist(region.access_layer.region_dir()) + .exists(region.access_layer.region_dir()) .await .unwrap()); } diff --git a/src/mito2/src/manifest/tests/checkpoint.rs b/src/mito2/src/manifest/tests/checkpoint.rs index 692f40422b17..6f2c92bc5e09 100644 --- a/src/mito2/src/manifest/tests/checkpoint.rs +++ b/src/mito2/src/manifest/tests/checkpoint.rs @@ -84,6 +84,7 @@ async fn manager_without_checkpoint() { // check files let mut expected = vec![ + "/", "00000000000000000010.json", "00000000000000000009.json", "00000000000000000008.json", @@ -130,6 +131,7 @@ async fn manager_with_checkpoint_distance_1() { // check files let mut expected = vec![ + "/", "00000000000000000009.checkpoint", "00000000000000000010.checkpoint", "00000000000000000010.json", diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs index 76c7a7150328..81251c91a564 100644 --- a/src/mito2/src/sst/file_purger.rs +++ b/src/mito2/src/sst/file_purger.rs @@ -185,7 +185,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.is_exist(&path).await.unwrap()); + assert!(!object_store.exists(&path).await.unwrap()); } #[tokio::test] @@ -247,7 +247,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.is_exist(&path).await.unwrap()); - assert!(!object_store.is_exist(&index_path).await.unwrap()); + assert!(!object_store.exists(&path).await.unwrap()); + assert!(!object_store.exists(&index_path).await.unwrap()); } } diff --git a/src/mito2/src/worker/handle_open.rs b/src/mito2/src/worker/handle_open.rs index d4a13a134597..01eaf1765224 100644 --- a/src/mito2/src/worker/handle_open.rs +++ b/src/mito2/src/worker/handle_open.rs @@ -51,7 +51,7 @@ impl RegionWorkerLoop { // Check if this region is pending drop. And clean the entire dir if so. if !self.dropping_regions.is_region_exists(region_id) && object_store - .is_exist(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) + .exists(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) .await .context(OpenDalSnafu)? { diff --git a/src/object-store/Cargo.toml b/src/object-store/Cargo.toml index 72e0e2bfbe46..b82be7376a72 100644 --- a/src/object-store/Cargo.toml +++ b/src/object-store/Cargo.toml @@ -17,8 +17,9 @@ futures.workspace = true lazy_static.workspace = true md5 = "0.7" moka = { workspace = true, features = ["future"] } -opendal = { version = "0.49", features = [ +opendal = { version = "0.50", features = [ "layers-tracing", + "layers-prometheus", "services-azblob", "services-fs", "services-gcs", diff --git a/src/object-store/src/layers.rs b/src/object-store/src/layers.rs index b2145aa6b0e5..20108ab63c52 100644 --- a/src/object-store/src/layers.rs +++ b/src/object-store/src/layers.rs @@ -13,8 +13,37 @@ // limitations under the License. mod lru_cache; -mod prometheus; pub use lru_cache::*; pub use opendal::layers::*; -pub use prometheus::PrometheusMetricsLayer; +pub use prometheus::build_prometheus_metrics_layer; + +mod prometheus { + use std::sync::{Mutex, OnceLock}; + + use opendal::layers::PrometheusLayer; + + static PROMETHEUS_LAYER: OnceLock> = OnceLock::new(); + + pub fn build_prometheus_metrics_layer(with_path_label: bool) -> PrometheusLayer { + PROMETHEUS_LAYER + .get_or_init(|| { + // This logical tries to extract parent path from the object storage operation + // the function also relies on assumption that the region path is built from + // pattern `/catalog/schema/table_id/....` + // + // We'll get the data/catalog/schema from path. + let path_level = if with_path_label { 3 } else { 0 }; + + let layer = PrometheusLayer::builder() + .path_label(path_level) + .register_default() + .unwrap(); + + Mutex::new(layer) + }) + .lock() + .unwrap() + .clone() + } +} diff --git a/src/object-store/src/layers/lru_cache/read_cache.rs b/src/object-store/src/layers/lru_cache/read_cache.rs index f88b36784d15..874b17280d9c 100644 --- a/src/object-store/src/layers/lru_cache/read_cache.rs +++ b/src/object-store/src/layers/lru_cache/read_cache.rs @@ -156,9 +156,12 @@ impl ReadCache { let size = entry.metadata().content_length(); OBJECT_STORE_LRU_CACHE_ENTRIES.inc(); OBJECT_STORE_LRU_CACHE_BYTES.add(size as i64); - self.mem_cache - .insert(read_key.to_string(), ReadResult::Success(size as u32)) - .await; + // ignore root path + if entry.path() != "/" { + self.mem_cache + .insert(read_key.to_string(), ReadResult::Success(size as u32)) + .await; + } } Ok(self.cache_stat().await) diff --git a/src/object-store/src/layers/prometheus.rs b/src/object-store/src/layers/prometheus.rs deleted file mode 100644 index fef83a91468a..000000000000 --- a/src/object-store/src/layers/prometheus.rs +++ /dev/null @@ -1,584 +0,0 @@ -// Copyright 2023 Greptime Team -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -//! code originally from , make a tiny change to avoid crash in multi thread env - -use std::fmt::{Debug, Formatter}; - -use common_telemetry::debug; -use lazy_static::lazy_static; -use opendal::raw::*; -use opendal::{Buffer, ErrorKind}; -use prometheus::{ - exponential_buckets, histogram_opts, register_histogram_vec, register_int_counter_vec, - Histogram, HistogramTimer, HistogramVec, IntCounterVec, -}; - -use crate::util::extract_parent_path; - -type Result = std::result::Result; - -lazy_static! { - static ref REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( - "opendal_requests_total", - "Total times of all kinds of operation being called", - &["scheme", "operation", "path"], - ) - .unwrap(); - static ref REQUESTS_DURATION_SECONDS: HistogramVec = register_histogram_vec!( - histogram_opts!( - "opendal_requests_duration_seconds", - "Histogram of the time spent on specific operation", - exponential_buckets(0.01, 2.0, 16).unwrap() - ), - &["scheme", "operation", "path"] - ) - .unwrap(); - static ref BYTES_TOTAL: HistogramVec = register_histogram_vec!( - histogram_opts!( - "opendal_bytes_total", - "Total size of sync or async Read/Write", - exponential_buckets(0.01, 2.0, 16).unwrap() - ), - &["scheme", "operation", "path"] - ) - .unwrap(); -} - -#[inline] -fn increment_errors_total(op: Operation, kind: ErrorKind) { - debug!( - "Prometheus statistics metrics error, operation {} error {}", - op.into_static(), - kind.into_static() - ); -} - -/// Please refer to [prometheus](https://docs.rs/prometheus) for every operation. -/// -/// # Prometheus Metrics -/// -/// In this section, we will introduce three metrics that are currently being exported by opendal. These metrics are essential for understanding the behavior and performance of opendal. -/// -/// -/// | Metric Name | Type | Description | Labels | -/// |-----------------------------------|-----------|------------------------------------------------------|---------------------| -/// | opendal_requests_total | Counter | Total times of all kinds of operation being called | scheme, operation | -/// | opendal_requests_duration_seconds | Histogram | Histogram of the time spent on specific operation | scheme, operation | -/// | opendal_bytes_total | Histogram | Total size of sync or async Read/Write | scheme, operation | -/// -/// For a more detailed explanation of these metrics and how they are used, please refer to the [Prometheus documentation](https://prometheus.io/docs/introduction/overview/). -/// -/// # Histogram Configuration -/// -/// The metric buckets for these histograms are automatically generated based on the `exponential_buckets(0.01, 2.0, 16)` configuration. -#[derive(Default, Debug, Clone)] -pub struct PrometheusMetricsLayer { - pub path_label: bool, -} - -impl PrometheusMetricsLayer { - pub fn new(path_label: bool) -> Self { - Self { path_label } - } -} - -impl Layer for PrometheusMetricsLayer { - type LayeredAccess = PrometheusAccess; - - fn layer(&self, inner: A) -> Self::LayeredAccess { - let meta = inner.info(); - let scheme = meta.scheme(); - - PrometheusAccess { - inner, - scheme: scheme.to_string(), - path_label: self.path_label, - } - } -} - -#[derive(Clone)] -pub struct PrometheusAccess { - inner: A, - scheme: String, - path_label: bool, -} - -impl PrometheusAccess { - fn get_path_label<'a>(&self, path: &'a str) -> &'a str { - if self.path_label { - extract_parent_path(path) - } else { - "" - } - } -} - -impl Debug for PrometheusAccess { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - f.debug_struct("PrometheusAccessor") - .field("inner", &self.inner) - .finish_non_exhaustive() - } -} - -impl LayeredAccess for PrometheusAccess { - type Inner = A; - type Reader = PrometheusMetricWrapper; - type BlockingReader = PrometheusMetricWrapper; - type Writer = PrometheusMetricWrapper; - type BlockingWriter = PrometheusMetricWrapper; - type Lister = A::Lister; - type BlockingLister = A::BlockingLister; - - fn inner(&self) -> &Self::Inner { - &self.inner - } - - async fn create_dir(&self, path: &str, args: OpCreateDir) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) - .start_timer(); - let create_res = self.inner.create_dir(path, args).await; - - timer.observe_duration(); - create_res.inspect_err(|e| { - increment_errors_total(Operation::CreateDir, e.kind()); - }) - } - - async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) - .start_timer(); - - let (rp, r) = self.inner.read(path, args).await.inspect_err(|e| { - increment_errors_total(Operation::Read, e.kind()); - })?; - - Ok(( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Read, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::Read.into_static(), - path_label, - ]), - timer, - ), - )) - } - - async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) - .start_timer(); - - let (rp, r) = self.inner.write(path, args).await.inspect_err(|e| { - increment_errors_total(Operation::Write, e.kind()); - })?; - - Ok(( - rp, - PrometheusMetricWrapper::new( - r, - Operation::Write, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::Write.into_static(), - path_label, - ]), - timer, - ), - )) - } - - async fn stat(&self, path: &str, args: OpStat) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) - .inc(); - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) - .start_timer(); - - let stat_res = self.inner.stat(path, args).await; - timer.observe_duration(); - stat_res.inspect_err(|e| { - increment_errors_total(Operation::Stat, e.kind()); - }) - } - - async fn delete(&self, path: &str, args: OpDelete) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) - .start_timer(); - - let delete_res = self.inner.delete(path, args).await; - timer.observe_duration(); - delete_res.inspect_err(|e| { - increment_errors_total(Operation::Delete, e.kind()); - }) - } - - async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) - .start_timer(); - - let list_res = self.inner.list(path, args).await; - - timer.observe_duration(); - list_res.inspect_err(|e| { - increment_errors_total(Operation::List, e.kind()); - }) - } - - async fn batch(&self, args: OpBatch) -> Result { - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) - .start_timer(); - let result = self.inner.batch(args).await; - - timer.observe_duration(); - result.inspect_err(|e| { - increment_errors_total(Operation::Batch, e.kind()); - }) - } - - async fn presign(&self, path: &str, args: OpPresign) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) - .start_timer(); - let result = self.inner.presign(path, args).await; - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::Presign, e.kind()); - }) - } - - fn blocking_create_dir(&self, path: &str, args: OpCreateDir) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingCreateDir.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingCreateDir.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_create_dir(path, args); - - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingCreateDir, e.kind()); - }) - } - - fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]) - .start_timer(); - - self.inner - .blocking_read(path, args) - .map(|(rp, r)| { - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::BlockingRead, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::BlockingRead.into_static(), - path_label, - ]), - timer, - ), - ) - }) - .inspect_err(|e| { - increment_errors_total(Operation::BlockingRead, e.kind()); - }) - } - - fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]) - .start_timer(); - - self.inner - .blocking_write(path, args) - .map(|(rp, r)| { - ( - rp, - PrometheusMetricWrapper::new( - r, - Operation::BlockingWrite, - BYTES_TOTAL.with_label_values(&[ - &self.scheme, - Operation::BlockingWrite.into_static(), - path_label, - ]), - timer, - ), - ) - }) - .inspect_err(|e| { - increment_errors_total(Operation::BlockingWrite, e.kind()); - }) - } - - fn blocking_stat(&self, path: &str, args: OpStat) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingStat.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingStat.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_stat(path, args); - timer.observe_duration(); - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingStat, e.kind()); - }) - } - - fn blocking_delete(&self, path: &str, args: OpDelete) -> Result { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingDelete.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingDelete.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_delete(path, args); - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingDelete, e.kind()); - }) - } - - fn blocking_list(&self, path: &str, args: OpList) -> Result<(RpList, Self::BlockingLister)> { - let path_label = self.get_path_label(path); - REQUESTS_TOTAL - .with_label_values(&[ - &self.scheme, - Operation::BlockingList.into_static(), - path_label, - ]) - .inc(); - - let timer = REQUESTS_DURATION_SECONDS - .with_label_values(&[ - &self.scheme, - Operation::BlockingList.into_static(), - path_label, - ]) - .start_timer(); - let result = self.inner.blocking_list(path, args); - timer.observe_duration(); - - result.inspect_err(|e| { - increment_errors_total(Operation::BlockingList, e.kind()); - }) - } -} - -pub struct PrometheusMetricWrapper { - inner: R, - - op: Operation, - bytes_counter: Histogram, - _requests_duration_timer: HistogramTimer, - bytes: u64, -} - -impl Drop for PrometheusMetricWrapper { - fn drop(&mut self) { - self.bytes_counter.observe(self.bytes as f64); - } -} - -impl PrometheusMetricWrapper { - fn new( - inner: R, - op: Operation, - bytes_counter: Histogram, - requests_duration_timer: HistogramTimer, - ) -> Self { - Self { - inner, - op, - bytes_counter, - _requests_duration_timer: requests_duration_timer, - bytes: 0, - } - } -} - -impl oio::Read for PrometheusMetricWrapper { - async fn read(&mut self) -> Result { - self.inner.read().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::BlockingRead for PrometheusMetricWrapper { - fn read(&mut self) -> opendal::Result { - self.inner.read().inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::Write for PrometheusMetricWrapper { - async fn write(&mut self, bs: Buffer) -> Result<()> { - let bytes = bs.len(); - match self.inner.write(bs).await { - Ok(_) => { - self.bytes += bytes as u64; - Ok(()) - } - Err(err) => { - increment_errors_total(self.op, err.kind()); - Err(err) - } - } - } - - async fn close(&mut self) -> Result<()> { - self.inner.close().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } - - async fn abort(&mut self) -> Result<()> { - self.inner.close().await.inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} - -impl oio::BlockingWrite for PrometheusMetricWrapper { - fn write(&mut self, bs: Buffer) -> Result<()> { - let bytes = bs.len(); - self.inner - .write(bs) - .map(|_| { - self.bytes += bytes as u64; - }) - .inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } - - fn close(&mut self) -> Result<()> { - self.inner.close().inspect_err(|err| { - increment_errors_total(self.op, err.kind()); - }) - } -} diff --git a/src/object-store/src/util.rs b/src/object-store/src/util.rs index fc0a031ab953..271da33e853c 100644 --- a/src/object-store/src/util.rs +++ b/src/object-store/src/util.rs @@ -15,19 +15,12 @@ use std::fmt::Display; use common_telemetry::{debug, error, trace}; -use futures::TryStreamExt; use opendal::layers::{LoggingInterceptor, LoggingLayer, TracingLayer}; use opendal::raw::{AccessorInfo, Operation}; -use opendal::{Entry, ErrorKind, Lister}; +use opendal::ErrorKind; -use crate::layers::PrometheusMetricsLayer; use crate::ObjectStore; -/// Collect all entries from the [Lister]. -pub async fn collect(stream: Lister) -> Result, opendal::Error> { - stream.try_collect::>().await -} - /// Join two paths and normalize the output dir. /// /// The output dir is always ends with `/`. e.g. @@ -127,26 +120,12 @@ pub fn normalize_path(path: &str) -> String { p } -// This logical tries to extract parent path from the object storage operation -// the function also relies on assumption that the region path is built from -// pattern `/catalog/schema/table_id/....` -// -// this implementation tries to extract at most 3 levels of parent path -pub(crate) fn extract_parent_path(path: &str) -> &str { - // split the path into `catalog`, `schema` and others - path.char_indices() - .filter(|&(_, c)| c == '/') - // we get the data/catalog/schema from path, split at the 3rd / - .nth(2) - .map_or(path, |(i, _)| &path[..i]) -} - /// Attaches instrument layers to the object store. pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore { object_store .layer(LoggingLayer::new(DefaultLoggingInterceptor)) .layer(TracingLayer) - .layer(PrometheusMetricsLayer::new(path_label)) + .layer(crate::layers::build_prometheus_metrics_layer(path_label)) } static LOGGING_TARGET: &str = "opendal::services"; @@ -263,28 +242,4 @@ mod tests { assert_eq!("/abc", join_path("//", "/abc")); assert_eq!("abc/def", join_path("abc/", "//def")); } - - #[test] - fn test_path_extraction() { - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public/1024/1024_0000000000/") - ); - - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public/1/") - ); - - assert_eq!( - "data/greptime/public", - extract_parent_path("data/greptime/public") - ); - - assert_eq!("data/greptime/", extract_parent_path("data/greptime/")); - - assert_eq!("data/", extract_parent_path("data/")); - - assert_eq!("/", extract_parent_path("/")); - } } diff --git a/src/object-store/tests/object_store_test.rs b/src/object-store/tests/object_store_test.rs index 497decffabfc..7e81b965fbed 100644 --- a/src/object-store/tests/object_store_test.rs +++ b/src/object-store/tests/object_store_test.rs @@ -65,23 +65,38 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> { store.write(p3, "Hello, object3!").await?; // List objects - let entries = store.list("/").await?; + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert_eq!(3, entries.len()); store.delete(p1).await?; store.delete(p3).await?; // List objects again - // Only o2 is exists - let entries = store.list("/").await?; + // Only o2 and root exist + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert_eq!(1, entries.len()); - assert_eq!(p2, entries.first().unwrap().path()); + assert_eq!(p2, entries[0].path()); let content = store.read(p2).await?; assert_eq!("Hello, object2!", String::from_utf8(content.to_vec())?); store.delete(p2).await?; - let entries = store.list("/").await?; + let entries = store + .list("/") + .await? + .into_iter() + .filter(|x| x.metadata().mode() == EntryMode::FILE) + .collect::>(); assert!(entries.is_empty()); assert!(store.read(p1).await.is_err()); @@ -252,7 +267,7 @@ async fn test_file_backend_with_lru_cache() -> Result<()> { async fn assert_lru_cache(cache_layer: &LruCacheLayer, file_names: &[&str]) { for file_name in file_names { - assert!(cache_layer.contains_file(file_name).await); + assert!(cache_layer.contains_file(file_name).await, "{file_name}"); } } @@ -264,7 +279,9 @@ async fn assert_cache_files( let (_, mut lister) = store.list("/", OpList::default()).await?; let mut objects = vec![]; while let Some(e) = lister.next().await? { - objects.push(e); + if e.mode() == EntryMode::FILE { + objects.push(e); + } } // compare the cache file with the expected cache file; ignore orders @@ -332,9 +349,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", ], &["Hello, object1!", "object2!", "Hello, object2!"], ) @@ -342,9 +359,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", ], ) .await; @@ -355,13 +372,13 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_eq!(cache_layer.read_cache_stat().await, (1, 15)); assert_cache_files( &cache_store, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], &["Hello, object1!"], ) .await?; assert_lru_cache( &cache_layer, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], ) .await; @@ -388,8 +405,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["Hello, object1!", "Hello, object3!", "Hello"], @@ -398,8 +415,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -416,7 +433,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_store, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["ello, object1!", "Hello, object3!", "Hello"], @@ -426,7 +443,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -448,7 +465,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) From 2d6f63a504b5da77134feed4e843e6131bd2c748 Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Thu, 19 Dec 2024 16:24:04 +0800 Subject: [PATCH 53/59] feat: show flow's mem usage in INFORMATION_SCHEMA.FLOWS (#4890) * feat: add flow mem size to sys table * chore: rm dup def * chore: remove unused variant * chore: minor refactor * refactor: per review --- Cargo.lock | 128 ++++++++++++++ src/catalog/src/error.rs | 8 + src/catalog/src/information_extension.rs | 9 + .../src/system_schema/information_schema.rs | 9 + .../system_schema/information_schema/flows.rs | 35 +++- src/cli/src/repl.rs | 2 +- src/cmd/src/standalone.rs | 33 +++- src/common/meta/src/key.rs | 4 +- src/common/meta/src/key/flow.rs | 9 + src/common/meta/src/key/flow/flow_state.rs | 162 ++++++++++++++++++ src/common/meta/src/kv_backend.rs | 7 +- src/common/meta/src/kv_backend/memory.rs | 8 +- src/flow/Cargo.toml | 2 + src/flow/src/adapter.rs | 32 ++++ src/flow/src/adapter/stat.rs | 40 +++++ src/flow/src/adapter/worker.rs | 30 ++++ src/flow/src/compute/state.rs | 5 + src/flow/src/heartbeat.rs | 49 +++++- src/flow/src/repr.rs | 8 + src/flow/src/server.rs | 17 +- src/flow/src/utils.rs | 139 ++++++++++++++- src/meta-client/src/client.rs | 15 +- src/meta-client/src/client/cluster.rs | 76 +++++++- src/meta-client/src/error.rs | 20 ++- src/meta-srv/src/error.rs | 10 +- src/meta-srv/src/handler.rs | 14 ++ .../src/handler/flow_state_handler.rs | 58 +++++++ src/meta-srv/src/metasrv/builder.rs | 7 + src/meta-srv/src/service/store/cached_kv.rs | 4 + .../standalone/common/flow/flow_basic.result | 12 ++ .../standalone/common/flow/flow_basic.sql | 6 + .../common/system/information_schema.result | 17 +- 32 files changed, 942 insertions(+), 33 deletions(-) create mode 100644 src/common/meta/src/key/flow/flow_state.rs create mode 100644 src/flow/src/adapter/stat.rs create mode 100644 src/meta-srv/src/handler/flow_state_handler.rs diff --git a/Cargo.lock b/Cargo.lock index fa8ba34d1a3b..c23acf60636d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -730,6 +730,36 @@ version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +[[package]] +name = "attribute-derive" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f1800e974930e9079c965b9ffbcb6667a40401063a26396c7b4f15edc92da690" +dependencies = [ + "attribute-derive-macro", + "derive-where", + "manyhow", + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "attribute-derive-macro" +version = "0.10.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d908eb786ef94296bff86f90130b3b748b49401dc81fd2bb8b3dccd44cfacbd" +dependencies = [ + "collection_literals", + "interpolator", + "manyhow", + "proc-macro-utils", + "proc-macro2", + "quote", + "quote-use", + "syn 2.0.90", +] + [[package]] name = "atty" version = "0.2.14" @@ -1845,6 +1875,12 @@ dependencies = [ "tracing-appender", ] +[[package]] +name = "collection_literals" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "186dce98367766de751c42c4f03970fc60fc012296e706ccbb9d5df9b6c1e271" + [[package]] name = "colorchoice" version = "1.0.2" @@ -3346,6 +3382,17 @@ dependencies = [ "syn 2.0.90", ] +[[package]] +name = "derive-where" +version = "1.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62d671cc41a825ebabc75757b62d3d168c577f9149b2d49ece1dad1f72119d25" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "derive_arbitrary" version = "1.3.2" @@ -4011,6 +4058,8 @@ dependencies = [ "enum-as-inner", "enum_dispatch", "futures", + "get-size-derive2", + "get-size2", "greptime-proto", "hydroflow", "itertools 0.10.5", @@ -4415,6 +4464,23 @@ dependencies = [ "libm", ] +[[package]] +name = "get-size-derive2" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd26d3a97ea14d289c8b54180243ecfe465f3fa9c279a6336d7a003698fc39d" +dependencies = [ + "attribute-derive", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "get-size2" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "159c430715e540d2198fa981d39cd45563ccc60900de187f5b152b33b1cb408e" + [[package]] name = "gethostname" version = "0.2.3" @@ -5346,6 +5412,12 @@ version = "4.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0d762194228a2f1c11063e46e32e5acb96e66e906382b9eb5441f2e0504bbd5a" +[[package]] +name = "interpolator" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "71dd52191aae121e8611f1e8dc3e324dd0dd1dee1e6dd91d10ee07a3cfb4d9d8" + [[package]] name = "inventory" version = "0.3.15" @@ -6244,6 +6316,29 @@ dependencies = [ "libc", ] +[[package]] +name = "manyhow" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b33efb3ca6d3b07393750d4030418d594ab1139cee518f0dc88db70fec873587" +dependencies = [ + "manyhow-macros", + "proc-macro2", + "quote", + "syn 2.0.90", +] + +[[package]] +name = "manyhow-macros" +version = "0.11.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46fce34d199b78b6e6073abf984c9cf5fd3e9330145a93ee0738a7443e371495" +dependencies = [ + "proc-macro-utils", + "proc-macro2", + "quote", +] + [[package]] name = "maplit" version = "1.0.2" @@ -8528,6 +8623,17 @@ dependencies = [ "version_check", ] +[[package]] +name = "proc-macro-utils" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eeaf08a13de400bc215877b5bdc088f241b12eb42f0a548d3390dc1c56bb7071" +dependencies = [ + "proc-macro2", + "quote", + "smallvec", +] + [[package]] name = "proc-macro2" version = "1.0.92" @@ -9107,6 +9213,28 @@ dependencies = [ "proc-macro2", ] +[[package]] +name = "quote-use" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9619db1197b497a36178cfc736dc96b271fe918875fbf1344c436a7e93d0321e" +dependencies = [ + "quote", + "quote-use-macros", +] + +[[package]] +name = "quote-use-macros" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "82ebfb7faafadc06a7ab141a6f67bcfb24cb8beb158c6fe933f2f035afa99f35" +dependencies = [ + "proc-macro-utils", + "proc-macro2", + "quote", + "syn 2.0.90", +] + [[package]] name = "radium" version = "0.7.0" diff --git a/src/catalog/src/error.rs b/src/catalog/src/error.rs index c7e6f8b55c01..4980a8178eb6 100644 --- a/src/catalog/src/error.rs +++ b/src/catalog/src/error.rs @@ -64,6 +64,13 @@ pub enum Error { source: BoxedError, }, + #[snafu(display("Failed to list flow stats"))] + ListFlowStats { + #[snafu(implicit)] + location: Location, + source: BoxedError, + }, + #[snafu(display("Failed to list flows in catalog {catalog}"))] ListFlows { #[snafu(implicit)] @@ -326,6 +333,7 @@ impl ErrorExt for Error { | Error::ListSchemas { source, .. } | Error::ListTables { source, .. } | Error::ListFlows { source, .. } + | Error::ListFlowStats { source, .. } | Error::ListProcedures { source, .. } | Error::ListRegionStats { source, .. } | Error::ConvertProtoData { source, .. } => source.status_code(), diff --git a/src/catalog/src/information_extension.rs b/src/catalog/src/information_extension.rs index 55764557a326..4d829ae01ae3 100644 --- a/src/catalog/src/information_extension.rs +++ b/src/catalog/src/information_extension.rs @@ -17,6 +17,7 @@ use common_error::ext::BoxedError; use common_meta::cluster::{ClusterInfo, NodeInfo}; use common_meta::datanode::RegionStat; use common_meta::ddl::{ExecutorContext, ProcedureExecutor}; +use common_meta::key::flow::flow_state::FlowStat; use common_meta::rpc::procedure; use common_procedure::{ProcedureInfo, ProcedureState}; use meta_client::MetaClientRef; @@ -89,4 +90,12 @@ impl InformationExtension for DistributedInformationExtension { .map_err(BoxedError::new) .context(error::ListRegionStatsSnafu) } + + async fn flow_stats(&self) -> std::result::Result, Self::Error> { + self.meta_client + .list_flow_stats() + .await + .map_err(BoxedError::new) + .context(crate::error::ListFlowStatsSnafu) + } } diff --git a/src/catalog/src/system_schema/information_schema.rs b/src/catalog/src/system_schema/information_schema.rs index 4101887cb443..6b3231cc080a 100644 --- a/src/catalog/src/system_schema/information_schema.rs +++ b/src/catalog/src/system_schema/information_schema.rs @@ -35,6 +35,7 @@ use common_catalog::consts::{self, DEFAULT_CATALOG_NAME, INFORMATION_SCHEMA_NAME use common_error::ext::ErrorExt; use common_meta::cluster::NodeInfo; use common_meta::datanode::RegionStat; +use common_meta::key::flow::flow_state::FlowStat; use common_meta::key::flow::FlowMetadataManager; use common_procedure::ProcedureInfo; use common_recordbatch::SendableRecordBatchStream; @@ -192,6 +193,7 @@ impl SystemSchemaProviderInner for InformationSchemaProvider { )) as _), FLOWS => Some(Arc::new(InformationSchemaFlows::new( self.catalog_name.clone(), + self.catalog_manager.clone(), self.flow_metadata_manager.clone(), )) as _), PROCEDURE_INFO => Some( @@ -338,6 +340,9 @@ pub trait InformationExtension { /// Gets the region statistics. async fn region_stats(&self) -> std::result::Result, Self::Error>; + + /// Get the flow statistics. If no flownode is available, return `None`. + async fn flow_stats(&self) -> std::result::Result, Self::Error>; } pub struct NoopInformationExtension; @@ -357,4 +362,8 @@ impl InformationExtension for NoopInformationExtension { async fn region_stats(&self) -> std::result::Result, Self::Error> { Ok(vec![]) } + + async fn flow_stats(&self) -> std::result::Result, Self::Error> { + Ok(None) + } } diff --git a/src/catalog/src/system_schema/information_schema/flows.rs b/src/catalog/src/system_schema/information_schema/flows.rs index 15a4205ae2af..5d35cfbbe431 100644 --- a/src/catalog/src/system_schema/information_schema/flows.rs +++ b/src/catalog/src/system_schema/information_schema/flows.rs @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::sync::Arc; +use std::sync::{Arc, Weak}; use common_catalog::consts::INFORMATION_SCHEMA_FLOW_TABLE_ID; use common_error::ext::BoxedError; use common_meta::key::flow::flow_info::FlowInfoValue; +use common_meta::key::flow::flow_state::FlowStat; use common_meta::key::flow::FlowMetadataManager; use common_meta::key::FlowId; use common_recordbatch::adapter::RecordBatchStreamAdapter; @@ -28,7 +29,9 @@ use datatypes::prelude::ConcreteDataType as CDT; use datatypes::scalars::ScalarVectorBuilder; use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use datatypes::value::Value; -use datatypes::vectors::{Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, VectorRef}; +use datatypes::vectors::{ + Int64VectorBuilder, StringVectorBuilder, UInt32VectorBuilder, UInt64VectorBuilder, VectorRef, +}; use futures::TryStreamExt; use snafu::{OptionExt, ResultExt}; use store_api::storage::{ScanRequest, TableId}; @@ -38,6 +41,8 @@ use crate::error::{ }; use crate::information_schema::{Predicates, FLOWS}; use crate::system_schema::information_schema::InformationTable; +use crate::system_schema::utils; +use crate::CatalogManager; const INIT_CAPACITY: usize = 42; @@ -45,6 +50,7 @@ const INIT_CAPACITY: usize = 42; // pk is (flow_name, flow_id, table_catalog) pub const FLOW_NAME: &str = "flow_name"; pub const FLOW_ID: &str = "flow_id"; +pub const STATE_SIZE: &str = "state_size"; pub const TABLE_CATALOG: &str = "table_catalog"; pub const FLOW_DEFINITION: &str = "flow_definition"; pub const COMMENT: &str = "comment"; @@ -55,20 +61,24 @@ pub const FLOWNODE_IDS: &str = "flownode_ids"; pub const OPTIONS: &str = "options"; /// The `information_schema.flows` to provides information about flows in databases. +/// pub(super) struct InformationSchemaFlows { schema: SchemaRef, catalog_name: String, + catalog_manager: Weak, flow_metadata_manager: Arc, } impl InformationSchemaFlows { pub(super) fn new( catalog_name: String, + catalog_manager: Weak, flow_metadata_manager: Arc, ) -> Self { Self { schema: Self::schema(), catalog_name, + catalog_manager, flow_metadata_manager, } } @@ -80,6 +90,7 @@ impl InformationSchemaFlows { vec![ (FLOW_NAME, CDT::string_datatype(), false), (FLOW_ID, CDT::uint32_datatype(), false), + (STATE_SIZE, CDT::uint64_datatype(), true), (TABLE_CATALOG, CDT::string_datatype(), false), (FLOW_DEFINITION, CDT::string_datatype(), false), (COMMENT, CDT::string_datatype(), true), @@ -99,6 +110,7 @@ impl InformationSchemaFlows { InformationSchemaFlowsBuilder::new( self.schema.clone(), self.catalog_name.clone(), + self.catalog_manager.clone(), &self.flow_metadata_manager, ) } @@ -144,10 +156,12 @@ impl InformationTable for InformationSchemaFlows { struct InformationSchemaFlowsBuilder { schema: SchemaRef, catalog_name: String, + catalog_manager: Weak, flow_metadata_manager: Arc, flow_names: StringVectorBuilder, flow_ids: UInt32VectorBuilder, + state_sizes: UInt64VectorBuilder, table_catalogs: StringVectorBuilder, raw_sqls: StringVectorBuilder, comments: StringVectorBuilder, @@ -162,15 +176,18 @@ impl InformationSchemaFlowsBuilder { fn new( schema: SchemaRef, catalog_name: String, + catalog_manager: Weak, flow_metadata_manager: &Arc, ) -> Self { Self { schema, catalog_name, + catalog_manager, flow_metadata_manager: flow_metadata_manager.clone(), flow_names: StringVectorBuilder::with_capacity(INIT_CAPACITY), flow_ids: UInt32VectorBuilder::with_capacity(INIT_CAPACITY), + state_sizes: UInt64VectorBuilder::with_capacity(INIT_CAPACITY), table_catalogs: StringVectorBuilder::with_capacity(INIT_CAPACITY), raw_sqls: StringVectorBuilder::with_capacity(INIT_CAPACITY), comments: StringVectorBuilder::with_capacity(INIT_CAPACITY), @@ -195,6 +212,11 @@ impl InformationSchemaFlowsBuilder { .flow_names(&catalog_name) .await; + let flow_stat = { + let information_extension = utils::information_extension(&self.catalog_manager)?; + information_extension.flow_stats().await? + }; + while let Some((flow_name, flow_id)) = stream .try_next() .await @@ -213,7 +235,7 @@ impl InformationSchemaFlowsBuilder { catalog_name: catalog_name.to_string(), flow_name: flow_name.to_string(), })?; - self.add_flow(&predicates, flow_id.flow_id(), flow_info)?; + self.add_flow(&predicates, flow_id.flow_id(), flow_info, &flow_stat)?; } self.finish() @@ -224,6 +246,7 @@ impl InformationSchemaFlowsBuilder { predicates: &Predicates, flow_id: FlowId, flow_info: FlowInfoValue, + flow_stat: &Option, ) -> Result<()> { let row = [ (FLOW_NAME, &Value::from(flow_info.flow_name().to_string())), @@ -238,6 +261,11 @@ impl InformationSchemaFlowsBuilder { } self.flow_names.push(Some(flow_info.flow_name())); self.flow_ids.push(Some(flow_id)); + self.state_sizes.push( + flow_stat + .as_ref() + .and_then(|state| state.state_size.get(&flow_id).map(|v| *v as u64)), + ); self.table_catalogs.push(Some(flow_info.catalog_name())); self.raw_sqls.push(Some(flow_info.raw_sql())); self.comments.push(Some(flow_info.comment())); @@ -270,6 +298,7 @@ impl InformationSchemaFlowsBuilder { let columns: Vec = vec![ Arc::new(self.flow_names.finish()), Arc::new(self.flow_ids.finish()), + Arc::new(self.state_sizes.finish()), Arc::new(self.table_catalogs.finish()), Arc::new(self.raw_sqls.finish()), Arc::new(self.comments.finish()), diff --git a/src/cli/src/repl.rs b/src/cli/src/repl.rs index 4c2ef8ffe396..8b5e3aa389a2 100644 --- a/src/cli/src/repl.rs +++ b/src/cli/src/repl.rs @@ -34,7 +34,7 @@ use common_query::Output; use common_recordbatch::RecordBatches; use common_telemetry::debug; use either::Either; -use meta_client::client::MetaClientBuilder; +use meta_client::client::{ClusterKvBackend, MetaClientBuilder}; use query::datafusion::DatafusionQueryEngine; use query::parser::QueryLanguageParser; use query::query_engine::{DefaultSerializer, QueryEngineState}; diff --git a/src/cmd/src/standalone.rs b/src/cmd/src/standalone.rs index d7e816166b03..8490e14147b2 100644 --- a/src/cmd/src/standalone.rs +++ b/src/cmd/src/standalone.rs @@ -34,6 +34,7 @@ use common_meta::ddl::flow_meta::{FlowMetadataAllocator, FlowMetadataAllocatorRe use common_meta::ddl::table_meta::{TableMetadataAllocator, TableMetadataAllocatorRef}; use common_meta::ddl::{DdlContext, NoopRegionFailureDetectorControl, ProcedureExecutorRef}; use common_meta::ddl_manager::DdlManager; +use common_meta::key::flow::flow_state::FlowStat; use common_meta::key::flow::{FlowMetadataManager, FlowMetadataManagerRef}; use common_meta::key::{TableMetadataManager, TableMetadataManagerRef}; use common_meta::kv_backend::KvBackendRef; @@ -70,7 +71,7 @@ use servers::http::HttpOptions; use servers::tls::{TlsMode, TlsOption}; use servers::Mode; use snafu::ResultExt; -use tokio::sync::broadcast; +use tokio::sync::{broadcast, RwLock}; use tracing_appender::non_blocking::WorkerGuard; use crate::error::{ @@ -507,7 +508,7 @@ impl StartCommand { procedure_manager.clone(), )); let catalog_manager = KvBackendCatalogManager::new( - information_extension, + information_extension.clone(), kv_backend.clone(), layered_cache_registry.clone(), Some(procedure_manager.clone()), @@ -532,6 +533,14 @@ impl StartCommand { .context(OtherSnafu)?, ); + // set the ref to query for the local flow state + { + let flow_worker_manager = flownode.flow_worker_manager(); + information_extension + .set_flow_worker_manager(flow_worker_manager.clone()) + .await; + } + let node_manager = Arc::new(StandaloneDatanodeManager { region_server: datanode.region_server(), flow_server: flownode.flow_worker_manager(), @@ -669,6 +678,7 @@ pub struct StandaloneInformationExtension { region_server: RegionServer, procedure_manager: ProcedureManagerRef, start_time_ms: u64, + flow_worker_manager: RwLock>>, } impl StandaloneInformationExtension { @@ -677,8 +687,15 @@ impl StandaloneInformationExtension { region_server, procedure_manager, start_time_ms: common_time::util::current_time_millis() as u64, + flow_worker_manager: RwLock::new(None), } } + + /// Set the flow worker manager for the standalone instance. + pub async fn set_flow_worker_manager(&self, flow_worker_manager: Arc) { + let mut guard = self.flow_worker_manager.write().await; + *guard = Some(flow_worker_manager); + } } #[async_trait::async_trait] @@ -750,6 +767,18 @@ impl InformationExtension for StandaloneInformationExtension { .collect::>(); Ok(stats) } + + async fn flow_stats(&self) -> std::result::Result, Self::Error> { + Ok(Some( + self.flow_worker_manager + .read() + .await + .as_ref() + .unwrap() + .gen_state_report() + .await, + )) + } } #[cfg(test)] diff --git a/src/common/meta/src/key.rs b/src/common/meta/src/key.rs index 90b96f32dc9e..b6aa57d497cc 100644 --- a/src/common/meta/src/key.rs +++ b/src/common/meta/src/key.rs @@ -137,6 +137,7 @@ use self::schema_name::{SchemaManager, SchemaNameKey, SchemaNameValue}; use self::table_route::{TableRouteManager, TableRouteValue}; use self::tombstone::TombstoneManager; use crate::error::{self, Result, SerdeJsonSnafu}; +use crate::key::flow::flow_state::FlowStateValue; use crate::key::node_address::NodeAddressValue; use crate::key::table_route::TableRouteKey; use crate::key::txn_helper::TxnOpGetResponseSet; @@ -1262,7 +1263,8 @@ impl_metadata_value! { FlowRouteValue, TableFlowValue, NodeAddressValue, - SchemaNameValue + SchemaNameValue, + FlowStateValue } impl_optional_metadata_value! { diff --git a/src/common/meta/src/key/flow.rs b/src/common/meta/src/key/flow.rs index 9715aab1fde1..9023ca2ef83d 100644 --- a/src/common/meta/src/key/flow.rs +++ b/src/common/meta/src/key/flow.rs @@ -15,6 +15,7 @@ pub mod flow_info; pub(crate) mod flow_name; pub(crate) mod flow_route; +pub mod flow_state; pub(crate) mod flownode_flow; pub(crate) mod table_flow; @@ -35,6 +36,7 @@ use crate::ensure_values; use crate::error::{self, Result}; use crate::key::flow::flow_info::FlowInfoManager; use crate::key::flow::flow_name::FlowNameManager; +use crate::key::flow::flow_state::FlowStateManager; use crate::key::flow::flownode_flow::FlownodeFlowManager; pub use crate::key::flow::table_flow::{TableFlowManager, TableFlowManagerRef}; use crate::key::txn_helper::TxnOpGetResponseSet; @@ -102,6 +104,8 @@ pub struct FlowMetadataManager { flownode_flow_manager: FlownodeFlowManager, table_flow_manager: TableFlowManager, flow_name_manager: FlowNameManager, + /// only metasrv have access to itself's memory backend, so for other case it should be None + flow_state_manager: Option, kv_backend: KvBackendRef, } @@ -114,6 +118,7 @@ impl FlowMetadataManager { flow_name_manager: FlowNameManager::new(kv_backend.clone()), flownode_flow_manager: FlownodeFlowManager::new(kv_backend.clone()), table_flow_manager: TableFlowManager::new(kv_backend.clone()), + flow_state_manager: None, kv_backend, } } @@ -123,6 +128,10 @@ impl FlowMetadataManager { &self.flow_name_manager } + pub fn flow_state_manager(&self) -> Option<&FlowStateManager> { + self.flow_state_manager.as_ref() + } + /// Returns the [`FlowInfoManager`]. pub fn flow_info_manager(&self) -> &FlowInfoManager { &self.flow_info_manager diff --git a/src/common/meta/src/key/flow/flow_state.rs b/src/common/meta/src/key/flow/flow_state.rs new file mode 100644 index 000000000000..eeb4b06f0132 --- /dev/null +++ b/src/common/meta/src/key/flow/flow_state.rs @@ -0,0 +1,162 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; +use std::sync::Arc; + +use serde::{Deserialize, Serialize}; + +use crate::error::{self, Result}; +use crate::key::flow::FlowScoped; +use crate::key::{FlowId, MetadataKey, MetadataValue}; +use crate::kv_backend::KvBackendRef; +use crate::rpc::store::PutRequest; + +/// The entire FlowId to Flow Size's Map is stored directly in the value part of the key. +const FLOW_STATE_KEY: &str = "state"; + +/// The key of flow state. +#[derive(Debug, Clone, Copy, PartialEq)] +struct FlowStateKeyInner; + +impl FlowStateKeyInner { + pub fn new() -> Self { + Self + } +} + +impl<'a> MetadataKey<'a, FlowStateKeyInner> for FlowStateKeyInner { + fn to_bytes(&self) -> Vec { + FLOW_STATE_KEY.as_bytes().to_vec() + } + + fn from_bytes(bytes: &'a [u8]) -> Result { + let key = std::str::from_utf8(bytes).map_err(|e| { + error::InvalidMetadataSnafu { + err_msg: format!( + "FlowInfoKeyInner '{}' is not a valid UTF8 string: {e}", + String::from_utf8_lossy(bytes) + ), + } + .build() + })?; + if key != FLOW_STATE_KEY { + return Err(error::InvalidMetadataSnafu { + err_msg: format!("Invalid FlowStateKeyInner '{key}'"), + } + .build()); + } + Ok(FlowStateKeyInner::new()) + } +} + +/// The key stores the state size of the flow. +/// +/// The layout: `__flow/state`. +pub struct FlowStateKey(FlowScoped); + +impl FlowStateKey { + /// Returns the [FlowStateKey]. + pub fn new() -> FlowStateKey { + let inner = FlowStateKeyInner::new(); + FlowStateKey(FlowScoped::new(inner)) + } +} + +impl Default for FlowStateKey { + fn default() -> Self { + Self::new() + } +} + +impl<'a> MetadataKey<'a, FlowStateKey> for FlowStateKey { + fn to_bytes(&self) -> Vec { + self.0.to_bytes() + } + + fn from_bytes(bytes: &'a [u8]) -> Result { + Ok(FlowStateKey(FlowScoped::::from_bytes( + bytes, + )?)) + } +} + +/// The value of flow state size +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub struct FlowStateValue { + /// For each key, the bytes of the state in memory + pub state_size: BTreeMap, +} + +impl FlowStateValue { + pub fn new(state_size: BTreeMap) -> Self { + Self { state_size } + } +} + +pub type FlowStateManagerRef = Arc; + +/// The manager of [FlowStateKey]. Since state size changes frequently, we store it in memory. +/// +/// This is only used in distributed mode. When meta-srv use heartbeat to update the flow stat report +/// and frontned use get to get the latest flow stat report. +pub struct FlowStateManager { + in_memory: KvBackendRef, +} + +impl FlowStateManager { + pub fn new(in_memory: KvBackendRef) -> Self { + Self { in_memory } + } + + pub async fn get(&self) -> Result> { + let key = FlowStateKey::new().to_bytes(); + self.in_memory + .get(&key) + .await? + .map(|x| FlowStateValue::try_from_raw_value(&x.value)) + .transpose() + } + + pub async fn put(&self, value: FlowStateValue) -> Result<()> { + let key = FlowStateKey::new().to_bytes(); + let value = value.try_as_raw_value()?; + let req = PutRequest::new().with_key(key).with_value(value); + self.in_memory.put(req).await?; + Ok(()) + } +} + +/// Flow's state report, send regularly through heartbeat message +#[derive(Debug, Clone)] +pub struct FlowStat { + /// For each key, the bytes of the state in memory + pub state_size: BTreeMap, +} + +impl From for FlowStat { + fn from(value: FlowStateValue) -> Self { + Self { + state_size: value.state_size, + } + } +} + +impl From for FlowStateValue { + fn from(value: FlowStat) -> Self { + Self { + state_size: value.state_size, + } + } +} diff --git a/src/common/meta/src/kv_backend.rs b/src/common/meta/src/kv_backend.rs index ba9db2ec2a44..d4b2cf2ef381 100644 --- a/src/common/meta/src/kv_backend.rs +++ b/src/common/meta/src/kv_backend.rs @@ -36,7 +36,7 @@ pub mod postgres; pub mod test; pub mod txn; -pub type KvBackendRef = Arc + Send + Sync>; +pub type KvBackendRef = Arc + Send + Sync>; #[async_trait] pub trait KvBackend: TxnService @@ -161,6 +161,9 @@ where Self::Error: ErrorExt, { fn reset(&self); + + /// Upcast as `KvBackendRef`. Since https://github.com/rust-lang/rust/issues/65991 is not yet stable. + fn as_kv_backend_ref(self: Arc) -> KvBackendRef; } -pub type ResettableKvBackendRef = Arc + Send + Sync>; +pub type ResettableKvBackendRef = Arc + Send + Sync>; diff --git a/src/common/meta/src/kv_backend/memory.rs b/src/common/meta/src/kv_backend/memory.rs index 256e31f93ed3..9475a30001ce 100644 --- a/src/common/meta/src/kv_backend/memory.rs +++ b/src/common/meta/src/kv_backend/memory.rs @@ -16,13 +16,13 @@ use std::any::Any; use std::collections::BTreeMap; use std::fmt::{Display, Formatter}; use std::marker::PhantomData; -use std::sync::RwLock; +use std::sync::{Arc, RwLock}; use async_trait::async_trait; use common_error::ext::ErrorExt; use serde::Serializer; -use super::ResettableKvBackend; +use super::{KvBackendRef, ResettableKvBackend}; use crate::kv_backend::txn::{Txn, TxnOp, TxnOpResponse, TxnRequest, TxnResponse}; use crate::kv_backend::{KvBackend, TxnService}; use crate::metrics::METRIC_META_TXN_REQUEST; @@ -311,6 +311,10 @@ impl ResettableKvBackend for MemoryKvBacken fn reset(&self) { self.clear(); } + + fn as_kv_backend_ref(self: Arc) -> KvBackendRef { + self + } } #[cfg(test)] diff --git a/src/flow/Cargo.toml b/src/flow/Cargo.toml index ffba0618daaf..08867d342a74 100644 --- a/src/flow/Cargo.toml +++ b/src/flow/Cargo.toml @@ -40,6 +40,8 @@ datatypes.workspace = true enum-as-inner = "0.6.0" enum_dispatch = "0.3" futures = "0.3" +get-size-derive2 = "0.1.2" +get-size2 = "0.1.2" greptime-proto.workspace = true # This fork of hydroflow is simply for keeping our dependency in our org, and pin the version # otherwise it is the same with upstream repo diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index 7d9ae5e422d2..586eaa8e586a 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -60,6 +60,7 @@ use crate::repr::{self, DiffRow, Row, BATCH_SIZE}; mod flownode_impl; mod parse_expr; +mod stat; #[cfg(test)] mod tests; mod util; @@ -69,6 +70,7 @@ pub(crate) mod node_context; mod table_source; use crate::error::Error; +use crate::utils::StateReportHandler; use crate::FrontendInvoker; // `GREPTIME_TIMESTAMP` is not used to distinguish when table is created automatically by flow @@ -137,6 +139,8 @@ pub struct FlowWorkerManager { /// /// So that a series of event like `inserts -> flush` can be handled correctly flush_lock: RwLock<()>, + /// receive a oneshot sender to send state size report + state_report_handler: RwLock>, } /// Building FlownodeManager @@ -170,9 +174,15 @@ impl FlowWorkerManager { tick_manager, node_id, flush_lock: RwLock::new(()), + state_report_handler: RwLock::new(None), } } + pub async fn with_state_report_handler(self, handler: StateReportHandler) -> Self { + *self.state_report_handler.write().await = Some(handler); + self + } + /// Create a flownode manager with one worker pub fn new_with_worker<'s>( node_id: Option, @@ -500,6 +510,27 @@ impl FlowWorkerManager { /// Flow Runtime related methods impl FlowWorkerManager { + /// Start state report handler, which will receive a sender from HeartbeatTask to send state size report back + /// + /// if heartbeat task is shutdown, this future will exit too + async fn start_state_report_handler(self: Arc) -> Option> { + let state_report_handler = self.state_report_handler.write().await.take(); + if let Some(mut handler) = state_report_handler { + let zelf = self.clone(); + let handler = common_runtime::spawn_global(async move { + while let Some(ret_handler) = handler.recv().await { + let state_report = zelf.gen_state_report().await; + ret_handler.send(state_report).unwrap_or_else(|err| { + common_telemetry::error!(err; "Send state size report error"); + }); + } + }); + Some(handler) + } else { + None + } + } + /// run in common_runtime background runtime pub fn run_background( self: Arc, @@ -507,6 +538,7 @@ impl FlowWorkerManager { ) -> JoinHandle<()> { info!("Starting flownode manager's background task"); common_runtime::spawn_global(async move { + let _state_report_handler = self.clone().start_state_report_handler().await; self.run(shutdown).await; }) } diff --git a/src/flow/src/adapter/stat.rs b/src/flow/src/adapter/stat.rs new file mode 100644 index 000000000000..c719e35f3ca9 --- /dev/null +++ b/src/flow/src/adapter/stat.rs @@ -0,0 +1,40 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::collections::BTreeMap; + +use common_meta::key::flow::flow_state::FlowStat; + +use crate::FlowWorkerManager; + +impl FlowWorkerManager { + pub async fn gen_state_report(&self) -> FlowStat { + let mut full_report = BTreeMap::new(); + for worker in self.worker_handles.iter() { + let worker = worker.lock().await; + match worker.get_state_size().await { + Ok(state_size) => { + full_report.extend(state_size.into_iter().map(|(k, v)| (k as u32, v))) + } + Err(err) => { + common_telemetry::error!(err; "Get flow stat size error"); + } + } + } + + FlowStat { + state_size: full_report, + } + } +} diff --git a/src/flow/src/adapter/worker.rs b/src/flow/src/adapter/worker.rs index 978d3c608cec..4a6b0ba963d9 100644 --- a/src/flow/src/adapter/worker.rs +++ b/src/flow/src/adapter/worker.rs @@ -197,6 +197,21 @@ impl WorkerHandle { .fail() } } + + pub async fn get_state_size(&self) -> Result, Error> { + let ret = self + .itc_client + .call_with_resp(Request::QueryStateSize) + .await?; + ret.into_query_state_size().map_err(|ret| { + InternalSnafu { + reason: format!( + "Flow Node/Worker itc failed, expect Response::QueryStateSize, found {ret:?}" + ), + } + .build() + }) + } } impl Drop for WorkerHandle { @@ -361,6 +376,13 @@ impl<'s> Worker<'s> { Some(Response::ContainTask { result: ret }) } Request::Shutdown => return Err(()), + Request::QueryStateSize => { + let mut ret = BTreeMap::new(); + for (flow_id, task_state) in self.task_states.iter() { + ret.insert(*flow_id, task_state.state.get_state_size()); + } + Some(Response::QueryStateSize { result: ret }) + } }; Ok(ret) } @@ -391,6 +413,7 @@ pub enum Request { flow_id: FlowId, }, Shutdown, + QueryStateSize, } #[derive(Debug, EnumAsInner)] @@ -406,6 +429,10 @@ enum Response { result: bool, }, RunAvail, + QueryStateSize { + /// each flow tasks' state size + result: BTreeMap, + }, } fn create_inter_thread_call() -> (InterThreadCallClient, InterThreadCallServer) { @@ -423,10 +450,12 @@ struct InterThreadCallClient { } impl InterThreadCallClient { + /// call without response fn call_no_resp(&self, req: Request) -> Result<(), Error> { self.arg_sender.send((req, None)).map_err(from_send_error) } + /// call with response async fn call_with_resp(&self, req: Request) -> Result { let (tx, rx) = oneshot::channel(); self.arg_sender @@ -527,6 +556,7 @@ mod test { ); tx.send(Batch::empty()).unwrap(); handle.run_available(0, true).await.unwrap(); + assert_eq!(handle.get_state_size().await.unwrap().len(), 1); assert_eq!(sink_rx.recv().await.unwrap(), Batch::empty()); drop(handle); worker_thread_handle.join().unwrap(); diff --git a/src/flow/src/compute/state.rs b/src/flow/src/compute/state.rs index d34b4a311d15..ee2c7628a28c 100644 --- a/src/flow/src/compute/state.rs +++ b/src/flow/src/compute/state.rs @@ -16,6 +16,7 @@ use std::cell::RefCell; use std::collections::{BTreeMap, VecDeque}; use std::rc::Rc; +use get_size2::GetSize; use hydroflow::scheduled::graph::Hydroflow; use hydroflow::scheduled::SubgraphId; @@ -109,6 +110,10 @@ impl DataflowState { pub fn expire_after(&self) -> Option { self.expire_after } + + pub fn get_state_size(&self) -> usize { + self.arrange_used.iter().map(|x| x.read().get_size()).sum() + } } #[derive(Debug, Clone)] diff --git a/src/flow/src/heartbeat.rs b/src/flow/src/heartbeat.rs index 96635e350dde..69159d1d2a01 100644 --- a/src/flow/src/heartbeat.rs +++ b/src/flow/src/heartbeat.rs @@ -24,6 +24,7 @@ use common_meta::heartbeat::handler::{ }; use common_meta::heartbeat::mailbox::{HeartbeatMailbox, MailboxRef, OutgoingMessage}; use common_meta::heartbeat::utils::outgoing_message_to_mailbox_message; +use common_meta::key::flow::flow_state::FlowStat; use common_telemetry::{debug, error, info, warn}; use greptime_proto::v1::meta::NodeInfo; use meta_client::client::{HeartbeatSender, HeartbeatStream, MetaClient}; @@ -34,8 +35,27 @@ use tokio::sync::mpsc; use tokio::time::Duration; use crate::error::ExternalSnafu; +use crate::utils::SizeReportSender; use crate::{Error, FlownodeOptions}; +async fn query_flow_state( + query_stat_size: &Option, + timeout: Duration, +) -> Option { + if let Some(report_requester) = query_stat_size.as_ref() { + let ret = report_requester.query(timeout).await; + match ret { + Ok(latest) => Some(latest), + Err(err) => { + error!(err; "Failed to get query stat size"); + None + } + } + } else { + None + } +} + /// The flownode heartbeat task which sending `[HeartbeatRequest]` to Metasrv periodically in background. #[derive(Clone)] pub struct HeartbeatTask { @@ -47,9 +67,14 @@ pub struct HeartbeatTask { resp_handler_executor: HeartbeatResponseHandlerExecutorRef, start_time_ms: u64, running: Arc, + query_stat_size: Option, } impl HeartbeatTask { + pub fn with_query_stat_size(mut self, query_stat_size: SizeReportSender) -> Self { + self.query_stat_size = Some(query_stat_size); + self + } pub fn new( opts: &FlownodeOptions, meta_client: Arc, @@ -65,6 +90,7 @@ impl HeartbeatTask { resp_handler_executor, start_time_ms: common_time::util::current_time_millis() as u64, running: Arc::new(AtomicBool::new(false)), + query_stat_size: None, } } @@ -112,6 +138,7 @@ impl HeartbeatTask { message: Option, peer: Option, start_time_ms: u64, + latest_report: &Option, ) -> Option { let mailbox_message = match message.map(outgoing_message_to_mailbox_message) { Some(Ok(message)) => Some(message), @@ -121,11 +148,22 @@ impl HeartbeatTask { } None => None, }; + let flow_stat = latest_report + .as_ref() + .map(|report| { + report + .state_size + .iter() + .map(|(k, v)| (*k, *v as u64)) + .collect() + }) + .map(|f| api::v1::meta::FlowStat { flow_stat_size: f }); Some(HeartbeatRequest { mailbox_message, peer, info: Self::build_node_info(start_time_ms), + flow_stat, ..Default::default() }) } @@ -151,24 +189,27 @@ impl HeartbeatTask { addr: self.peer_addr.clone(), }); + let query_stat_size = self.query_stat_size.clone(); + common_runtime::spawn_hb(async move { // note that using interval will cause it to first immediately send // a heartbeat let mut interval = tokio::time::interval(report_interval); interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + let mut latest_report = None; loop { let req = tokio::select! { message = outgoing_rx.recv() => { if let Some(message) = message { - Self::create_heartbeat_request(Some(message), self_peer.clone(), start_time_ms) + Self::create_heartbeat_request(Some(message), self_peer.clone(), start_time_ms, &latest_report) } else { // Receives None that means Sender was dropped, we need to break the current loop break } } _ = interval.tick() => { - Self::create_heartbeat_request(None, self_peer.clone(), start_time_ms) + Self::create_heartbeat_request(None, self_peer.clone(), start_time_ms, &latest_report) } }; @@ -180,6 +221,10 @@ impl HeartbeatTask { debug!("Send a heartbeat request to metasrv, content: {:?}", req); } } + // after sending heartbeat, try to get the latest report + // TODO(discord9): consider a better place to update the size report + // set the timeout to half of the report interval so that it wouldn't delay heartbeat if something went horribly wrong + latest_report = query_flow_state(&query_stat_size, report_interval / 2).await; } }); } diff --git a/src/flow/src/repr.rs b/src/flow/src/repr.rs index 1c257c8f3c6c..50f2a78ef8f9 100644 --- a/src/flow/src/repr.rs +++ b/src/flow/src/repr.rs @@ -22,12 +22,14 @@ use api::v1::Row as ProtoRow; use datatypes::data_type::ConcreteDataType; use datatypes::types::cast; use datatypes::value::Value; +use get_size2::GetSize; use itertools::Itertools; pub(crate) use relation::{ColumnType, Key, RelationDesc, RelationType}; use serde::{Deserialize, Serialize}; use snafu::ResultExt; use crate::expr::error::{CastValueSnafu, EvalError, InvalidArgumentSnafu}; +use crate::utils::get_value_heap_size; /// System-wide Record count difference type. Useful for capture data change /// @@ -105,6 +107,12 @@ pub struct Row { pub inner: Vec, } +impl GetSize for Row { + fn get_heap_size(&self) -> usize { + self.inner.iter().map(get_value_heap_size).sum() + } +} + impl Row { /// Create an empty row pub fn empty() -> Self { diff --git a/src/flow/src/server.rs b/src/flow/src/server.rs index 87b6bbdc09ed..1259c1175510 100644 --- a/src/flow/src/server.rs +++ b/src/flow/src/server.rs @@ -55,6 +55,7 @@ use crate::error::{ }; use crate::heartbeat::HeartbeatTask; use crate::transform::register_function_to_query_engine; +use crate::utils::{SizeReportSender, StateReportHandler}; use crate::{Error, FlowWorkerManager, FlownodeOptions}; pub const FLOW_NODE_SERVER_NAME: &str = "FLOW_NODE_SERVER"; @@ -236,6 +237,8 @@ pub struct FlownodeBuilder { catalog_manager: CatalogManagerRef, flow_metadata_manager: FlowMetadataManagerRef, heartbeat_task: Option, + /// receive a oneshot sender to send state size report + state_report_handler: Option, } impl FlownodeBuilder { @@ -254,17 +257,20 @@ impl FlownodeBuilder { catalog_manager, flow_metadata_manager, heartbeat_task: None, + state_report_handler: None, } } pub fn with_heartbeat_task(self, heartbeat_task: HeartbeatTask) -> Self { + let (sender, receiver) = SizeReportSender::new(); Self { - heartbeat_task: Some(heartbeat_task), + heartbeat_task: Some(heartbeat_task.with_query_stat_size(sender)), + state_report_handler: Some(receiver), ..self } } - pub async fn build(self) -> Result { + pub async fn build(mut self) -> Result { // TODO(discord9): does this query engine need those? let query_engine_factory = QueryEngineFactory::new_with_plugins( // query engine in flownode is only used for translate plan with resolved table source. @@ -383,7 +389,7 @@ impl FlownodeBuilder { /// build [`FlowWorkerManager`], note this doesn't take ownership of `self`, /// nor does it actually start running the worker. async fn build_manager( - &self, + &mut self, query_engine: Arc, ) -> Result { let table_meta = self.table_meta.clone(); @@ -402,12 +408,15 @@ impl FlownodeBuilder { info!("Flow Worker started in new thread"); worker.run(); }); - let man = rx.await.map_err(|_e| { + let mut man = rx.await.map_err(|_e| { UnexpectedSnafu { reason: "sender is dropped, failed to create flow node manager", } .build() })?; + if let Some(handler) = self.state_report_handler.take() { + man = man.with_state_report_handler(handler).await; + } info!("Flow Node Manager started"); Ok(man) } diff --git a/src/flow/src/utils.rs b/src/flow/src/utils.rs index 1cd5b3ba5c1c..5e01d0bfa423 100644 --- a/src/flow/src/utils.rs +++ b/src/flow/src/utils.rs @@ -18,16 +18,73 @@ use std::collections::{BTreeMap, BTreeSet}; use std::ops::Bound; use std::sync::Arc; +use common_meta::key::flow::flow_state::FlowStat; use common_telemetry::trace; +use datatypes::value::Value; +use get_size2::GetSize; use smallvec::{smallvec, SmallVec}; -use tokio::sync::RwLock; +use tokio::sync::{mpsc, oneshot, RwLock}; +use tokio::time::Instant; +use crate::error::InternalSnafu; use crate::expr::{EvalError, ScalarExpr}; use crate::repr::{value_to_internal_ts, DiffRow, Duration, KeyValDiffRow, Row, Timestamp}; /// A batch of updates, arranged by key pub type Batch = BTreeMap>; +/// Get a estimate of heap size of a value +pub fn get_value_heap_size(v: &Value) -> usize { + match v { + Value::Binary(bin) => bin.len(), + Value::String(s) => s.len(), + Value::List(list) => list.items().iter().map(get_value_heap_size).sum(), + _ => 0, + } +} + +#[derive(Clone)] +pub struct SizeReportSender { + inner: mpsc::Sender>, +} + +impl SizeReportSender { + pub fn new() -> (Self, StateReportHandler) { + let (tx, rx) = mpsc::channel(1); + let zelf = Self { inner: tx }; + (zelf, rx) + } + + /// Query the size report, will timeout after one second if no response + pub async fn query(&self, timeout: std::time::Duration) -> crate::Result { + let (tx, rx) = oneshot::channel(); + self.inner.send(tx).await.map_err(|_| { + InternalSnafu { + reason: "failed to send size report request due to receiver dropped", + } + .build() + })?; + let timeout = tokio::time::timeout(timeout, rx); + timeout + .await + .map_err(|_elapsed| { + InternalSnafu { + reason: "failed to receive size report after one second timeout", + } + .build() + })? + .map_err(|_| { + InternalSnafu { + reason: "failed to receive size report due to sender dropped", + } + .build() + }) + } +} + +/// Handle the size report request, and send the report back +pub type StateReportHandler = mpsc::Receiver>; + /// A spine of batches, arranged by timestamp /// TODO(discord9): consider internally index by key, value, and timestamp for faster lookup pub type Spine = BTreeMap; @@ -49,6 +106,24 @@ pub struct KeyExpiryManager { event_timestamp_from_row: Option, } +impl GetSize for KeyExpiryManager { + fn get_heap_size(&self) -> usize { + let row_size = if let Some(row_size) = &self + .event_ts_to_key + .first_key_value() + .map(|(_, v)| v.first().get_heap_size()) + { + *row_size + } else { + 0 + }; + self.event_ts_to_key + .values() + .map(|v| v.len() * row_size + std::mem::size_of::()) + .sum::() + } +} + impl KeyExpiryManager { pub fn new( key_expiration_duration: Option, @@ -154,7 +229,7 @@ impl KeyExpiryManager { /// /// Note the two way arrow between reduce operator and arrange, it's because reduce operator need to query existing state /// and also need to update existing state. -#[derive(Debug, Clone, Default, Eq, PartialEq, Ord, PartialOrd)] +#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)] pub struct Arrangement { /// A name or identifier for the arrangement which can be used for debugging or logging purposes. /// This field is not critical to the functionality but aids in monitoring and management of arrangements. @@ -196,6 +271,61 @@ pub struct Arrangement { /// The time that the last compaction happened, also known as the current time. last_compaction_time: Option, + + /// Estimated size of the arrangement in heap size. + estimated_size: usize, + last_size_update: Instant, + size_update_interval: tokio::time::Duration, +} + +impl Arrangement { + fn compute_size(&self) -> usize { + self.spine + .values() + .map(|v| { + let per_entry_size = v + .first_key_value() + .map(|(k, v)| { + k.get_heap_size() + + v.len() * v.first().map(|r| r.get_heap_size()).unwrap_or(0) + }) + .unwrap_or(0); + std::mem::size_of::() + v.len() * per_entry_size + }) + .sum::() + + self.expire_state.get_heap_size() + + self.name.get_heap_size() + } + + fn update_and_fetch_size(&mut self) -> usize { + if self.last_size_update.elapsed() > self.size_update_interval { + self.estimated_size = self.compute_size(); + self.last_size_update = Instant::now(); + } + self.estimated_size + } +} + +impl GetSize for Arrangement { + fn get_heap_size(&self) -> usize { + self.estimated_size + } +} + +impl Default for Arrangement { + fn default() -> Self { + Self { + spine: Default::default(), + full_arrangement: false, + is_written: false, + expire_state: None, + last_compaction_time: None, + name: Vec::new(), + estimated_size: 0, + last_size_update: Instant::now(), + size_update_interval: tokio::time::Duration::from_secs(3), + } + } } impl Arrangement { @@ -207,6 +337,9 @@ impl Arrangement { expire_state: None, last_compaction_time: None, name, + estimated_size: 0, + last_size_update: Instant::now(), + size_update_interval: tokio::time::Duration::from_secs(3), } } @@ -269,6 +402,7 @@ impl Arrangement { // without changing the order of updates within same tick key_updates.sort_by_key(|(_val, ts, _diff)| *ts); } + self.update_and_fetch_size(); Ok(max_expired_by) } @@ -390,6 +524,7 @@ impl Arrangement { // insert the compacted batch into spine with key being `now` self.spine.insert(now, compacting_batch); + self.update_and_fetch_size(); Ok(max_expired_by) } diff --git a/src/meta-client/src/client.rs b/src/meta-client/src/client.rs index d0008a7e81b1..ebe0e94e4861 100644 --- a/src/meta-client/src/client.rs +++ b/src/meta-client/src/client.rs @@ -25,6 +25,7 @@ use std::sync::Arc; use api::v1::meta::{ProcedureDetailResponse, Role}; use cluster::Client as ClusterClient; +pub use cluster::ClusterKvBackend; use common_error::ext::BoxedError; use common_grpc::channel_manager::{ChannelConfig, ChannelManager}; use common_meta::cluster::{ @@ -33,6 +34,8 @@ use common_meta::cluster::{ use common_meta::datanode::{DatanodeStatKey, DatanodeStatValue, RegionStat}; use common_meta::ddl::{ExecutorContext, ProcedureExecutor}; use common_meta::error::{self as meta_error, ExternalSnafu, Result as MetaResult}; +use common_meta::key::flow::flow_state::{FlowStat, FlowStateManager}; +use common_meta::kv_backend::KvBackendRef; use common_meta::range_stream::PaginationStream; use common_meta::rpc::ddl::{SubmitDdlTaskRequest, SubmitDdlTaskResponse}; use common_meta::rpc::procedure::{ @@ -54,7 +57,8 @@ use store::Client as StoreClient; pub use self::heartbeat::{HeartbeatSender, HeartbeatStream}; use crate::error::{ - ConvertMetaRequestSnafu, ConvertMetaResponseSnafu, Error, NotStartedSnafu, Result, + ConvertMetaRequestSnafu, ConvertMetaResponseSnafu, Error, GetFlowStatSnafu, NotStartedSnafu, + Result, }; pub type Id = (u64, u64); @@ -347,6 +351,15 @@ fn decode_stats(kv: KeyValue) -> MetaResult { } impl MetaClient { + pub async fn list_flow_stats(&self) -> Result> { + let cluster_backend = ClusterKvBackend::new(Arc::new(self.cluster_client()?)); + let cluster_backend = Arc::new(cluster_backend) as KvBackendRef; + let flow_state_manager = FlowStateManager::new(cluster_backend); + let res = flow_state_manager.get().await.context(GetFlowStatSnafu)?; + + Ok(res.map(|r| r.into())) + } + pub fn new(id: Id) -> Self { Self { id, diff --git a/src/meta-client/src/client/cluster.rs b/src/meta-client/src/client/cluster.rs index c7edbcc8d39e..d50ac9717cc8 100644 --- a/src/meta-client/src/client/cluster.rs +++ b/src/meta-client/src/client/cluster.rs @@ -40,8 +40,8 @@ use tonic::Status; use crate::client::ask_leader::AskLeader; use crate::client::{util, Id}; use crate::error::{ - ConvertMetaResponseSnafu, CreateChannelSnafu, Error, IllegalGrpcClientStateSnafu, Result, - RetryTimesExceededSnafu, + ConvertMetaResponseSnafu, CreateChannelSnafu, Error, IllegalGrpcClientStateSnafu, + ReadOnlyKvBackendSnafu, Result, RetryTimesExceededSnafu, }; #[derive(Clone, Debug)] @@ -308,3 +308,75 @@ impl Inner { .map(|res| (res.leader, res.followers)) } } + +/// A client for the cluster info. Read only and corresponding to +/// `in_memory` kvbackend in the meta-srv. +#[derive(Clone, Debug)] +pub struct ClusterKvBackend { + inner: Arc, +} + +impl ClusterKvBackend { + pub fn new(client: Arc) -> Self { + Self { inner: client } + } + + fn unimpl(&self) -> common_meta::error::Error { + let ret: common_meta::error::Result<()> = ReadOnlyKvBackendSnafu { + name: self.name().to_string(), + } + .fail() + .map_err(BoxedError::new) + .context(common_meta::error::ExternalSnafu); + ret.unwrap_err() + } +} + +impl TxnService for ClusterKvBackend { + type Error = common_meta::error::Error; +} + +#[async_trait::async_trait] +impl KvBackend for ClusterKvBackend { + fn name(&self) -> &str { + "ClusterKvBackend" + } + + fn as_any(&self) -> &dyn Any { + self + } + + async fn range(&self, req: RangeRequest) -> common_meta::error::Result { + self.inner + .range(req) + .await + .map_err(BoxedError::new) + .context(common_meta::error::ExternalSnafu) + } + + async fn batch_get(&self, _: BatchGetRequest) -> common_meta::error::Result { + Err(self.unimpl()) + } + + async fn put(&self, _: PutRequest) -> common_meta::error::Result { + Err(self.unimpl()) + } + + async fn batch_put(&self, _: BatchPutRequest) -> common_meta::error::Result { + Err(self.unimpl()) + } + + async fn delete_range( + &self, + _: DeleteRangeRequest, + ) -> common_meta::error::Result { + Err(self.unimpl()) + } + + async fn batch_delete( + &self, + _: BatchDeleteRequest, + ) -> common_meta::error::Result { + Err(self.unimpl()) + } +} diff --git a/src/meta-client/src/error.rs b/src/meta-client/src/error.rs index a4f8663368b4..be1cf150da0f 100644 --- a/src/meta-client/src/error.rs +++ b/src/meta-client/src/error.rs @@ -99,8 +99,22 @@ pub enum Error { source: common_meta::error::Error, }, + #[snafu(display("Failed to get flow stat"))] + GetFlowStat { + #[snafu(implicit)] + location: Location, + source: common_meta::error::Error, + }, + #[snafu(display("Retry exceeded max times({}), message: {}", times, msg))] RetryTimesExceeded { times: usize, msg: String }, + + #[snafu(display("Trying to write to a read-only kv backend: {}", name))] + ReadOnlyKvBackend { + name: String, + #[snafu(implicit)] + location: Location, + }, } #[allow(dead_code)] @@ -120,13 +134,15 @@ impl ErrorExt for Error { | Error::SendHeartbeat { .. } | Error::CreateHeartbeatStream { .. } | Error::CreateChannel { .. } - | Error::RetryTimesExceeded { .. } => StatusCode::Internal, + | Error::RetryTimesExceeded { .. } + | Error::ReadOnlyKvBackend { .. } => StatusCode::Internal, Error::MetaServer { code, .. } => *code, Error::InvalidResponseHeader { source, .. } | Error::ConvertMetaRequest { source, .. } - | Error::ConvertMetaResponse { source, .. } => source.status_code(), + | Error::ConvertMetaResponse { source, .. } + | Error::GetFlowStat { source, .. } => source.status_code(), } } } diff --git a/src/meta-srv/src/error.rs b/src/meta-srv/src/error.rs index 705f31ac49f4..1c529f06d606 100644 --- a/src/meta-srv/src/error.rs +++ b/src/meta-srv/src/error.rs @@ -716,6 +716,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Flow state handler error"))] + FlowStateHandler { + #[snafu(implicit)] + location: Location, + source: common_meta::error::Error, + }, } impl Error { @@ -761,7 +768,8 @@ impl ErrorExt for Error { | Error::Join { .. } | Error::PeerUnavailable { .. } | Error::ExceededDeadline { .. } - | Error::ChooseItems { .. } => StatusCode::Internal, + | Error::ChooseItems { .. } + | Error::FlowStateHandler { .. } => StatusCode::Internal, Error::Unsupported { .. } => StatusCode::Unsupported, diff --git a/src/meta-srv/src/handler.rs b/src/meta-srv/src/handler.rs index 3b4eb9a27935..ad1492cd7cdc 100644 --- a/src/meta-srv/src/handler.rs +++ b/src/meta-srv/src/handler.rs @@ -51,6 +51,7 @@ use tokio::sync::mpsc::Sender; use tokio::sync::{oneshot, Notify, RwLock}; use crate::error::{self, DeserializeFromJsonSnafu, Result, UnexpectedInstructionReplySnafu}; +use crate::handler::flow_state_handler::FlowStateHandler; use crate::metasrv::Context; use crate::metrics::{METRIC_META_HANDLER_EXECUTE, METRIC_META_HEARTBEAT_CONNECTION_NUM}; use crate::pubsub::PublisherRef; @@ -64,6 +65,7 @@ pub mod collect_stats_handler; pub mod extract_stat_handler; pub mod failure_handler; pub mod filter_inactive_region_stats; +pub mod flow_state_handler; pub mod keep_lease_handler; pub mod mailbox_handler; pub mod on_leader_start_handler; @@ -482,6 +484,8 @@ pub struct HeartbeatHandlerGroupBuilder { /// based on the number of received heartbeats. When the number of heartbeats /// reaches this factor, a flush operation is triggered. flush_stats_factor: Option, + /// A simple handler for flow internal state report + flow_state_handler: Option, /// The plugins. plugins: Option, @@ -499,12 +503,18 @@ impl HeartbeatHandlerGroupBuilder { region_failure_handler: None, region_lease_handler: None, flush_stats_factor: None, + flow_state_handler: None, plugins: None, pushers, handlers: vec![], } } + pub fn with_flow_state_handler(mut self, handler: Option) -> Self { + self.flow_state_handler = handler; + self + } + pub fn with_region_lease_handler(mut self, handler: Option) -> Self { self.region_lease_handler = handler; self @@ -564,6 +574,10 @@ impl HeartbeatHandlerGroupBuilder { } self.add_handler_last(CollectStatsHandler::new(self.flush_stats_factor)); + if let Some(flow_state_handler) = self.flow_state_handler.take() { + self.add_handler_last(flow_state_handler); + } + self } diff --git a/src/meta-srv/src/handler/flow_state_handler.rs b/src/meta-srv/src/handler/flow_state_handler.rs new file mode 100644 index 000000000000..ab387eb29185 --- /dev/null +++ b/src/meta-srv/src/handler/flow_state_handler.rs @@ -0,0 +1,58 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use api::v1::meta::{FlowStat, HeartbeatRequest, Role}; +use common_meta::key::flow::flow_state::{FlowStateManager, FlowStateValue}; +use snafu::ResultExt; + +use crate::error::{FlowStateHandlerSnafu, Result}; +use crate::handler::{HandleControl, HeartbeatAccumulator, HeartbeatHandler}; +use crate::metasrv::Context; + +pub struct FlowStateHandler { + flow_state_manager: FlowStateManager, +} + +impl FlowStateHandler { + pub fn new(flow_state_manager: FlowStateManager) -> Self { + Self { flow_state_manager } + } +} + +#[async_trait::async_trait] +impl HeartbeatHandler for FlowStateHandler { + fn is_acceptable(&self, role: Role) -> bool { + role == Role::Flownode + } + + async fn handle( + &self, + req: &HeartbeatRequest, + _ctx: &mut Context, + _acc: &mut HeartbeatAccumulator, + ) -> Result { + if let Some(FlowStat { flow_stat_size }) = &req.flow_stat { + let state_size = flow_stat_size + .iter() + .map(|(k, v)| (*k, *v as usize)) + .collect(); + let value = FlowStateValue::new(state_size); + self.flow_state_manager + .put(value) + .await + .context(FlowStateHandlerSnafu)?; + } + Ok(HandleControl::Continue) + } +} diff --git a/src/meta-srv/src/metasrv/builder.rs b/src/meta-srv/src/metasrv/builder.rs index 05344b482b06..0afaf004933d 100644 --- a/src/meta-srv/src/metasrv/builder.rs +++ b/src/meta-srv/src/metasrv/builder.rs @@ -26,6 +26,7 @@ use common_meta::ddl::{ }; use common_meta::ddl_manager::DdlManager; use common_meta::distributed_time_constants; +use common_meta::key::flow::flow_state::FlowStateManager; use common_meta::key::flow::FlowMetadataManager; use common_meta::key::maintenance::MaintenanceModeManager; use common_meta::key::TableMetadataManager; @@ -47,6 +48,7 @@ use crate::error::{self, Result}; use crate::flow_meta_alloc::FlowPeerAllocator; use crate::greptimedb_telemetry::get_greptimedb_telemetry_task; use crate::handler::failure_handler::RegionFailureHandler; +use crate::handler::flow_state_handler::FlowStateHandler; use crate::handler::region_lease_handler::RegionLeaseHandler; use crate::handler::{HeartbeatHandlerGroupBuilder, HeartbeatMailbox, Pushers}; use crate::lease::MetaPeerLookupService; @@ -228,6 +230,7 @@ impl MetasrvBuilder { peer_allocator, )) }); + let flow_metadata_allocator = { // for now flownode just use round-robin selector let flow_selector = RoundRobinSelector::new(SelectTarget::Flownode); @@ -248,6 +251,9 @@ impl MetasrvBuilder { peer_allocator, )) }; + let flow_state_handler = + FlowStateHandler::new(FlowStateManager::new(in_memory.clone().as_kv_backend_ref())); + let memory_region_keeper = Arc::new(MemoryRegionKeeper::default()); let node_manager = node_manager.unwrap_or_else(|| { let datanode_client_channel_config = ChannelConfig::new() @@ -350,6 +356,7 @@ impl MetasrvBuilder { .with_region_failure_handler(region_failover_handler) .with_region_lease_handler(Some(region_lease_handler)) .with_flush_stats_factor(Some(options.flush_stats_factor)) + .with_flow_state_handler(Some(flow_state_handler)) .add_default_handlers() } }; diff --git a/src/meta-srv/src/service/store/cached_kv.rs b/src/meta-srv/src/service/store/cached_kv.rs index d4b6f84f5802..0f90ecddea33 100644 --- a/src/meta-srv/src/service/store/cached_kv.rs +++ b/src/meta-srv/src/service/store/cached_kv.rs @@ -386,6 +386,10 @@ impl ResettableKvBackend for LeaderCachedKvBackend { fn reset(&self) { self.cache.reset() } + + fn as_kv_backend_ref(self: Arc) -> KvBackendRef { + self + } } #[cfg(test)] diff --git a/tests/cases/standalone/common/flow/flow_basic.result b/tests/cases/standalone/common/flow/flow_basic.result index 8ee6a90c83bf..fa360a6de684 100644 --- a/tests/cases/standalone/common/flow/flow_basic.result +++ b/tests/cases/standalone/common/flow/flow_basic.result @@ -1045,6 +1045,18 @@ FROM | svc1 | 200 | 2024-10-18T19:01:31 | +--------------+-----+---------------------+ +-- Test if FLOWS table works, but don't care about the result since it vary from runs +SELECT + count(CASE WHEN state_size > 0 THEN 1 ELSE 0 END) as active_flows, +FROM + INFORMATION_SCHEMA.FLOWS; + ++--------------+ +| active_flows | ++--------------+ +| 1 | ++--------------+ + DROP FLOW requests_long_term; Affected Rows: 0 diff --git a/tests/cases/standalone/common/flow/flow_basic.sql b/tests/cases/standalone/common/flow/flow_basic.sql index 43a42de4dd5f..8946c014be36 100644 --- a/tests/cases/standalone/common/flow/flow_basic.sql +++ b/tests/cases/standalone/common/flow/flow_basic.sql @@ -569,6 +569,12 @@ SELECT FROM requests_without_ip; +-- Test if FLOWS table works, but don't care about the result since it vary from runs +SELECT + count(CASE WHEN state_size > 0 THEN 1 ELSE 0 END) as active_flows, +FROM + INFORMATION_SCHEMA.FLOWS; + DROP FLOW requests_long_term; DROP TABLE requests_without_ip; diff --git a/tests/cases/standalone/common/system/information_schema.result b/tests/cases/standalone/common/system/information_schema.result index b1c8c9329514..a0dce0152296 100644 --- a/tests/cases/standalone/common/system/information_schema.result +++ b/tests/cases/standalone/common/system/information_schema.result @@ -187,16 +187,17 @@ select * from information_schema.columns order by table_schema, table_name, colu | greptime | information_schema | files | update_count | 13 | | | 19 | 0 | | | | | | select,insert | | Int64 | bigint | FIELD | | No | bigint | | | | greptime | information_schema | files | update_time | 34 | | | | | 3 | | | | | select,insert | | DateTime | datetime | FIELD | | No | datetime | | | | greptime | information_schema | files | version | 25 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | information_schema | flows | comment | 5 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | flows | expire_after | 6 | | | 19 | 0 | | | | | | select,insert | | Int64 | bigint | FIELD | | Yes | bigint | | | -| greptime | information_schema | flows | flow_definition | 4 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | +| greptime | information_schema | flows | comment | 6 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | flows | expire_after | 7 | | | 19 | 0 | | | | | | select,insert | | Int64 | bigint | FIELD | | Yes | bigint | | | +| greptime | information_schema | flows | flow_definition | 5 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | flows | flow_id | 2 | | | 10 | 0 | | | | | | select,insert | | UInt32 | int unsigned | FIELD | | No | int unsigned | | | | greptime | information_schema | flows | flow_name | 1 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | information_schema | flows | flownode_ids | 9 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | flows | options | 10 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | flows | sink_table_name | 8 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | -| greptime | information_schema | flows | source_table_ids | 7 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | -| greptime | information_schema | flows | table_catalog | 3 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | +| greptime | information_schema | flows | flownode_ids | 10 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | flows | options | 11 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | flows | sink_table_name | 9 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | +| greptime | information_schema | flows | source_table_ids | 8 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | Yes | string | | | +| greptime | information_schema | flows | state_size | 3 | | | 20 | 0 | | | | | | select,insert | | UInt64 | bigint unsigned | FIELD | | Yes | bigint unsigned | | | +| greptime | information_schema | flows | table_catalog | 4 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | global_status | variable_name | 1 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | global_status | variable_value | 2 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | | greptime | information_schema | key_column_usage | column_name | 8 | 2147483647 | 2147483647 | | | | utf8 | utf8_bin | | | select,insert | | String | string | FIELD | | No | string | | | From 7ea8a44d3a61427f61529f76a0f7966abe928643 Mon Sep 17 00:00:00 2001 From: dennis zhuang Date: Thu, 19 Dec 2024 16:28:20 +0800 Subject: [PATCH 54/59] chore: update PR template (#5199) --- .github/pull_request_template.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index c50137f87681..5b0fe1bcfe2d 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -4,7 +4,8 @@ I hereby agree to the terms of the [GreptimeDB CLA](https://github.com/GreptimeT ## What's changed and what's your intention? -__!!! DO NOT LEAVE THIS BLOCK EMPTY !!!__ + -## Checklist +## PR Checklist +Please convert it to a draft if some of the following conditions are not met. - [ ] I have written the necessary rustdoc comments. - [ ] I have added the necessary unit tests and integration tests. - [ ] This PR requires documentation updates. +- [ ] API changes are backward compatible. +- [ ] Schema or data changes are backward compatible. From a4d61bcaf188e88ebcaface8ee899ea2e6eb99bb Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Thu, 19 Dec 2024 17:16:56 +0800 Subject: [PATCH 55/59] fix(flow): batch builder with type (#5195) * fix: typed builder * chore: clippy * chore: rename * fix: unit tests * refactor: per review --- src/flow/src/adapter.rs | 22 ++- src/flow/src/adapter/flownode_impl.rs | 13 +- src/flow/src/adapter/node_context.rs | 23 ++- src/flow/src/adapter/util.rs | 15 ++ src/flow/src/compute/render.rs | 23 ++- src/flow/src/compute/render/map.rs | 3 +- src/flow/src/compute/render/reduce.rs | 35 ++-- src/flow/src/expr.rs | 37 +++- src/flow/src/expr/linear.rs | 70 +++++-- .../standalone/common/flow/flow_basic.result | 178 +++++++++++++++++- .../standalone/common/flow/flow_basic.sql | 79 +++++++- 11 files changed, 434 insertions(+), 64 deletions(-) diff --git a/src/flow/src/adapter.rs b/src/flow/src/adapter.rs index 586eaa8e586a..6d70377cf2aa 100644 --- a/src/flow/src/adapter.rs +++ b/src/flow/src/adapter.rs @@ -565,6 +565,8 @@ impl FlowWorkerManager { let default_interval = Duration::from_secs(1); let mut avg_spd = 0; // rows/sec let mut since_last_run = tokio::time::Instant::now(); + let run_per_trace = 10; + let mut run_cnt = 0; loop { // TODO(discord9): only run when new inputs arrive or scheduled to let row_cnt = self.run_available(true).await.unwrap_or_else(|err| { @@ -607,10 +609,19 @@ impl FlowWorkerManager { } else { (9 * avg_spd + cur_spd) / 10 }; - trace!("avg_spd={} r/s, cur_spd={} r/s", avg_spd, cur_spd); let new_wait = BATCH_SIZE * 1000 / avg_spd.max(1); //in ms let new_wait = Duration::from_millis(new_wait as u64).min(default_interval); - trace!("Wait for {} ms, row_cnt={}", new_wait.as_millis(), row_cnt); + + // print trace every `run_per_trace` times so that we can see if there is something wrong + // but also not get flooded with trace + if run_cnt >= run_per_trace { + trace!("avg_spd={} r/s, cur_spd={} r/s", avg_spd, cur_spd); + trace!("Wait for {} ms, row_cnt={}", new_wait.as_millis(), row_cnt); + run_cnt = 0; + } else { + run_cnt += 1; + } + METRIC_FLOW_RUN_INTERVAL_MS.set(new_wait.as_millis() as i64); since_last_run = tokio::time::Instant::now(); tokio::time::sleep(new_wait).await; @@ -670,13 +681,18 @@ impl FlowWorkerManager { &self, region_id: RegionId, rows: Vec, + batch_datatypes: &[ConcreteDataType], ) -> Result<(), Error> { let rows_len = rows.len(); let table_id = region_id.table_id(); let _timer = METRIC_FLOW_INSERT_ELAPSED .with_label_values(&[table_id.to_string().as_str()]) .start_timer(); - self.node_context.read().await.send(table_id, rows).await?; + self.node_context + .read() + .await + .send(table_id, rows, batch_datatypes) + .await?; trace!( "Handling write request for table_id={} with {} rows", table_id, diff --git a/src/flow/src/adapter/flownode_impl.rs b/src/flow/src/adapter/flownode_impl.rs index 3841d08914c5..1fa11b4d83a2 100644 --- a/src/flow/src/adapter/flownode_impl.rs +++ b/src/flow/src/adapter/flownode_impl.rs @@ -28,6 +28,7 @@ use itertools::Itertools; use snafu::{OptionExt, ResultExt}; use store_api::storage::RegionId; +use super::util::from_proto_to_data_type; use crate::adapter::{CreateFlowArgs, FlowWorkerManager}; use crate::error::InternalSnafu; use crate::metrics::METRIC_FLOW_TASK_COUNT; @@ -206,9 +207,17 @@ impl Flownode for FlowWorkerManager { }) .map(|r| (r, now, 1)) .collect_vec(); - self.handle_write_request(region_id.into(), rows) - .await + let batch_datatypes = insert_schema + .iter() + .map(from_proto_to_data_type) + .collect::, _>>() .map_err(to_meta_err)?; + self.handle_write_request(region_id.into(), rows, &batch_datatypes) + .await + .map_err(|err| { + common_telemetry::error!(err;"Failed to handle write request"); + to_meta_err(err) + })?; } Ok(Default::default()) } diff --git a/src/flow/src/adapter/node_context.rs b/src/flow/src/adapter/node_context.rs index 26e1a6483ab8..990fdd129797 100644 --- a/src/flow/src/adapter/node_context.rs +++ b/src/flow/src/adapter/node_context.rs @@ -19,6 +19,7 @@ use std::sync::atomic::{AtomicUsize, Ordering}; use std::sync::Arc; use common_telemetry::trace; +use datatypes::prelude::ConcreteDataType; use session::context::QueryContext; use snafu::{OptionExt, ResultExt}; use table::metadata::TableId; @@ -131,7 +132,11 @@ impl SourceSender { } /// return number of rows it actual send(including what's in the buffer) - pub async fn send_rows(&self, rows: Vec) -> Result { + pub async fn send_rows( + &self, + rows: Vec, + batch_datatypes: &[ConcreteDataType], + ) -> Result { METRIC_FLOW_INPUT_BUF_SIZE.add(rows.len() as _); while self.send_buf_row_cnt.load(Ordering::SeqCst) >= BATCH_SIZE * 4 { tokio::task::yield_now().await; @@ -139,8 +144,11 @@ impl SourceSender { // row count metrics is approx so relaxed order is ok self.send_buf_row_cnt .fetch_add(rows.len(), Ordering::SeqCst); - let batch = Batch::try_from_rows(rows.into_iter().map(|(row, _, _)| row).collect()) - .context(EvalSnafu)?; + let batch = Batch::try_from_rows_with_types( + rows.into_iter().map(|(row, _, _)| row).collect(), + batch_datatypes, + ) + .context(EvalSnafu)?; common_telemetry::trace!("Send one batch to worker with {} rows", batch.row_count()); self.send_buf_tx.send(batch).await.map_err(|e| { crate::error::InternalSnafu { @@ -157,14 +165,19 @@ impl FlownodeContext { /// return number of rows it actual send(including what's in the buffer) /// /// TODO(discord9): make this concurrent - pub async fn send(&self, table_id: TableId, rows: Vec) -> Result { + pub async fn send( + &self, + table_id: TableId, + rows: Vec, + batch_datatypes: &[ConcreteDataType], + ) -> Result { let sender = self .source_sender .get(&table_id) .with_context(|| TableNotFoundSnafu { name: table_id.to_string(), })?; - sender.send_rows(rows).await + sender.send_rows(rows, batch_datatypes).await } /// flush all sender's buf diff --git a/src/flow/src/adapter/util.rs b/src/flow/src/adapter/util.rs index a1d2895ba3be..f2a29bec8e9e 100644 --- a/src/flow/src/adapter/util.rs +++ b/src/flow/src/adapter/util.rs @@ -16,12 +16,27 @@ use api::helper::ColumnDataTypeWrapper; use api::v1::column_def::options_from_column_schema; use api::v1::{ColumnDataType, ColumnDataTypeExtension, SemanticType}; use common_error::ext::BoxedError; +use datatypes::prelude::ConcreteDataType; use datatypes::schema::ColumnSchema; use itertools::Itertools; use snafu::ResultExt; use crate::error::{Error, ExternalSnafu}; +pub fn from_proto_to_data_type( + column_schema: &api::v1::ColumnSchema, +) -> Result { + let wrapper = ColumnDataTypeWrapper::try_new( + column_schema.datatype, + column_schema.datatype_extension.clone(), + ) + .map_err(BoxedError::new) + .context(ExternalSnafu)?; + let cdt = ConcreteDataType::from(wrapper); + + Ok(cdt) +} + /// convert `ColumnSchema` lists to it's corresponding proto type pub fn column_schemas_to_proto( column_schemas: Vec, diff --git a/src/flow/src/compute/render.rs b/src/flow/src/compute/render.rs index 94f00a182921..46ac7e8a1d5e 100644 --- a/src/flow/src/compute/render.rs +++ b/src/flow/src/compute/render.rs @@ -30,7 +30,7 @@ use crate::compute::types::{Collection, CollectionBundle, ErrCollector, Toff}; use crate::error::{Error, InvalidQuerySnafu, NotImplementedSnafu}; use crate::expr::{self, Batch, GlobalId, LocalId}; use crate::plan::{Plan, TypedPlan}; -use crate::repr::{self, DiffRow}; +use crate::repr::{self, DiffRow, RelationType}; mod map; mod reduce; @@ -124,10 +124,10 @@ impl Context<'_, '_> { /// Like `render_plan` but in Batch Mode pub fn render_plan_batch(&mut self, plan: TypedPlan) -> Result, Error> { match plan.plan { - Plan::Constant { rows } => Ok(self.render_constant_batch(rows)), + Plan::Constant { rows } => Ok(self.render_constant_batch(rows, &plan.schema.typ)), Plan::Get { id } => self.get_batch_by_id(id), Plan::Let { id, value, body } => self.eval_batch_let(id, value, body), - Plan::Mfp { input, mfp } => self.render_mfp_batch(input, mfp), + Plan::Mfp { input, mfp } => self.render_mfp_batch(input, mfp, &plan.schema.typ), Plan::Reduce { input, key_val_plan, @@ -172,7 +172,11 @@ impl Context<'_, '_> { /// render Constant, take all rows that have a timestamp not greater than the current time /// This function is primarily used for testing /// Always assume input is sorted by timestamp - pub fn render_constant_batch(&mut self, rows: Vec) -> CollectionBundle { + pub fn render_constant_batch( + &mut self, + rows: Vec, + output_type: &RelationType, + ) -> CollectionBundle { let (send_port, recv_port) = self.df.make_edge::<_, Toff>("constant_batch"); let mut per_time: BTreeMap> = Default::default(); for (key, group) in &rows.into_iter().group_by(|(_row, ts, _diff)| *ts) { @@ -185,6 +189,8 @@ impl Context<'_, '_> { let scheduler_inner = scheduler.clone(); let err_collector = self.err_collector.clone(); + let output_type = output_type.clone(); + let subgraph_id = self.df .add_subgraph_source("ConstantBatch", send_port, move |_ctx, send_port| { @@ -199,7 +205,14 @@ impl Context<'_, '_> { not_great_than_now.into_iter().for_each(|(_ts, rows)| { err_collector.run(|| { let rows = rows.into_iter().map(|(row, _ts, _diff)| row).collect(); - let batch = Batch::try_from_rows(rows)?; + let batch = Batch::try_from_rows_with_types( + rows, + &output_type + .column_types + .iter() + .map(|ty| ty.scalar_type().clone()) + .collect_vec(), + )?; send_port.give(vec![batch]); Ok(()) }); diff --git a/src/flow/src/compute/render/map.rs b/src/flow/src/compute/render/map.rs index 416652328401..059e93fe3956 100644 --- a/src/flow/src/compute/render/map.rs +++ b/src/flow/src/compute/render/map.rs @@ -25,7 +25,7 @@ use crate::compute::types::{Arranged, Collection, CollectionBundle, ErrCollector use crate::error::{Error, PlanSnafu}; use crate::expr::{Batch, EvalError, MapFilterProject, MfpPlan, ScalarExpr}; use crate::plan::TypedPlan; -use crate::repr::{self, DiffRow, KeyValDiffRow, Row}; +use crate::repr::{self, DiffRow, KeyValDiffRow, RelationType, Row}; use crate::utils::ArrangeHandler; impl Context<'_, '_> { @@ -34,6 +34,7 @@ impl Context<'_, '_> { &mut self, input: Box, mfp: MapFilterProject, + _output_type: &RelationType, ) -> Result, Error> { let input = self.render_plan_batch(*input)?; diff --git a/src/flow/src/compute/render/reduce.rs b/src/flow/src/compute/render/reduce.rs index ee20f8ca783f..1d0689c4032f 100644 --- a/src/flow/src/compute/render/reduce.rs +++ b/src/flow/src/compute/render/reduce.rs @@ -87,6 +87,8 @@ impl Context<'_, '_> { })?; let key_val_plan = key_val_plan.clone(); + let output_type = output_type.clone(); + let now = self.compute_state.current_time_ref(); let err_collector = self.err_collector.clone(); @@ -118,6 +120,7 @@ impl Context<'_, '_> { src_data, &key_val_plan, &accum_plan, + &output_type, SubgraphArg { now, err_collector: &err_collector, @@ -354,6 +357,7 @@ fn reduce_batch_subgraph( src_data: impl IntoIterator, key_val_plan: &KeyValPlan, accum_plan: &AccumulablePlan, + output_type: &RelationType, SubgraphArg { now, err_collector, @@ -535,17 +539,13 @@ fn reduce_batch_subgraph( // this output part is not supposed to be resource intensive // (because for every batch there wouldn't usually be as many output row?), // so we can do some costly operation here - let output_types = all_output_dict.first_entry().map(|entry| { - entry - .key() - .iter() - .chain(entry.get().iter()) - .map(|v| v.data_type()) - .collect::>() - }); + let output_types = output_type + .column_types + .iter() + .map(|t| t.scalar_type.clone()) + .collect_vec(); - if let Some(output_types) = output_types { - err_collector.run(|| { + err_collector.run(|| { let column_cnt = output_types.len(); let row_cnt = all_output_dict.len(); @@ -585,7 +585,6 @@ fn reduce_batch_subgraph( Ok(()) }); - } } /// reduce subgraph, reduce the input data into a single row @@ -1516,7 +1515,9 @@ mod test { let mut ctx = harness_test_ctx(&mut df, &mut state); let rows = vec![ - (Row::new(vec![1i64.into()]), 1, 1), + (Row::new(vec![Value::Null]), -1, 1), + (Row::new(vec![1i64.into()]), 0, 1), + (Row::new(vec![Value::Null]), 1, 1), (Row::new(vec![2i64.into()]), 2, 1), (Row::new(vec![3i64.into()]), 3, 1), (Row::new(vec![1i64.into()]), 4, 1), @@ -1558,13 +1559,15 @@ mod test { Box::new(input_plan.with_types(typ.into_unnamed())), &key_val_plan, &reduce_plan, - &RelationType::empty(), + &RelationType::new(vec![ColumnType::new(CDT::int64_datatype(), true)]), ) .unwrap(); { let now_inner = now.clone(); let expected = BTreeMap::>::from([ + (-1, vec![]), + (0, vec![1i64]), (1, vec![1i64]), (2, vec![3i64]), (3, vec![6i64]), @@ -1581,7 +1584,11 @@ mod test { if let Some(expected) = expected.get(&now) { let batch = expected.iter().map(|v| Value::from(*v)).collect_vec(); - let batch = Batch::try_from_rows(vec![batch.into()]).unwrap(); + let batch = Batch::try_from_rows_with_types( + vec![batch.into()], + &[CDT::int64_datatype()], + ) + .unwrap(); assert_eq!(res.first(), Some(&batch)); } }); diff --git a/src/flow/src/expr.rs b/src/flow/src/expr.rs index 2e6019ba4ca7..5dde62b43a69 100644 --- a/src/flow/src/expr.rs +++ b/src/flow/src/expr.rs @@ -24,7 +24,7 @@ mod scalar; mod signature; use arrow::compute::FilterBuilder; -use datatypes::prelude::DataType; +use datatypes::prelude::{ConcreteDataType, DataType}; use datatypes::value::Value; use datatypes::vectors::{BooleanVector, Helper, VectorRef}; pub(crate) use df_func::{DfScalarFunction, RawDfScalarFn}; @@ -85,16 +85,18 @@ impl Default for Batch { } impl Batch { - pub fn try_from_rows(rows: Vec) -> Result { + /// Get batch from rows, will try best to determine data type + pub fn try_from_rows_with_types( + rows: Vec, + batch_datatypes: &[ConcreteDataType], + ) -> Result { if rows.is_empty() { return Ok(Self::empty()); } let len = rows.len(); - let mut builder = rows - .first() - .unwrap() + let mut builder = batch_datatypes .iter() - .map(|v| v.data_type().create_mutable_vector(len)) + .map(|ty| ty.create_mutable_vector(len)) .collect_vec(); for row in rows { ensure!( @@ -221,10 +223,25 @@ impl Batch { return Ok(()); } - let dts = if self.batch.is_empty() { - other.batch.iter().map(|v| v.data_type()).collect_vec() - } else { - self.batch.iter().map(|v| v.data_type()).collect_vec() + let dts = { + let max_len = self.batch.len().max(other.batch.len()); + let mut dts = Vec::with_capacity(max_len); + for i in 0..max_len { + if let Some(v) = self.batch().get(i) + && !v.data_type().is_null() + { + dts.push(v.data_type()) + } else if let Some(v) = other.batch().get(i) + && !v.data_type().is_null() + { + dts.push(v.data_type()) + } else { + // both are null, so we will push null type + dts.push(datatypes::prelude::ConcreteDataType::null_datatype()) + } + } + + dts }; let batch_builders = dts diff --git a/src/flow/src/expr/linear.rs b/src/flow/src/expr/linear.rs index 373e467aba1b..f96d7827b6bd 100644 --- a/src/flow/src/expr/linear.rs +++ b/src/flow/src/expr/linear.rs @@ -908,20 +908,33 @@ mod test { .unwrap() .unwrap(); assert_eq!(ret, Row::pack(vec![Value::from(false), Value::from(true)])); - + let ty = [ + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ]; // batch mode - let mut batch = Batch::try_from_rows(vec![Row::from(vec![ - Value::from(4), - Value::from(2), - Value::from(3), - ])]) + let mut batch = Batch::try_from_rows_with_types( + vec![Row::from(vec![ + Value::from(4), + Value::from(2), + Value::from(3), + ])], + &ty, + ) .unwrap(); let ret = safe_mfp.eval_batch_into(&mut batch).unwrap(); assert_eq!( ret, - Batch::try_from_rows(vec![Row::from(vec![Value::from(false), Value::from(true)])]) - .unwrap() + Batch::try_from_rows_with_types( + vec![Row::from(vec![Value::from(false), Value::from(true)])], + &[ + ConcreteDataType::boolean_datatype(), + ConcreteDataType::boolean_datatype(), + ], + ) + .unwrap() ); } @@ -956,7 +969,15 @@ mod test { .unwrap(); assert_eq!(ret, None); - let mut input1_batch = Batch::try_from_rows(vec![Row::new(input1)]).unwrap(); + let input_type = [ + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::string_datatype(), + ]; + + let mut input1_batch = + Batch::try_from_rows_with_types(vec![Row::new(input1)], &input_type).unwrap(); let ret_batch = safe_mfp.eval_batch_into(&mut input1_batch).unwrap(); assert_eq!( ret_batch, @@ -974,7 +995,8 @@ mod test { .unwrap(); assert_eq!(ret, Some(Row::pack(vec![Value::from(11)]))); - let mut input2_batch = Batch::try_from_rows(vec![Row::new(input2)]).unwrap(); + let mut input2_batch = + Batch::try_from_rows_with_types(vec![Row::new(input2)], &input_type).unwrap(); let ret_batch = safe_mfp.eval_batch_into(&mut input2_batch).unwrap(); assert_eq!( ret_batch, @@ -1027,7 +1049,14 @@ mod test { let ret = safe_mfp.evaluate_into(&mut input1.clone(), &mut Row::empty()); assert!(matches!(ret, Err(EvalError::InvalidArgument { .. }))); - let mut input1_batch = Batch::try_from_rows(vec![Row::new(input1)]).unwrap(); + let input_type = [ + ConcreteDataType::int64_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ]; + let mut input1_batch = + Batch::try_from_rows_with_types(vec![Row::new(input1)], &input_type).unwrap(); let ret_batch = safe_mfp.eval_batch_into(&mut input1_batch); assert!(matches!(ret_batch, Err(EvalError::InvalidArgument { .. }))); @@ -1037,7 +1066,13 @@ mod test { .unwrap(); assert_eq!(ret, Some(Row::new(input2.clone()))); - let input2_batch = Batch::try_from_rows(vec![Row::new(input2)]).unwrap(); + let input_type = [ + ConcreteDataType::int64_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ]; + let input2_batch = + Batch::try_from_rows_with_types(vec![Row::new(input2)], &input_type).unwrap(); let ret_batch = safe_mfp.eval_batch_into(&mut input2_batch.clone()).unwrap(); assert_eq!(ret_batch, input2_batch); @@ -1047,7 +1082,8 @@ mod test { .unwrap(); assert_eq!(ret, None); - let input3_batch = Batch::try_from_rows(vec![Row::new(input3)]).unwrap(); + let input3_batch = + Batch::try_from_rows_with_types(vec![Row::new(input3)], &input_type).unwrap(); let ret_batch = safe_mfp.eval_batch_into(&mut input3_batch.clone()).unwrap(); assert_eq!( ret_batch, @@ -1083,7 +1119,13 @@ mod test { let ret = safe_mfp.evaluate_into(&mut input1.clone(), &mut Row::empty()); assert_eq!(ret.unwrap(), Some(Row::new(vec![Value::from(false)]))); - let mut input1_batch = Batch::try_from_rows(vec![Row::new(input1)]).unwrap(); + let input_type = [ + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ConcreteDataType::int32_datatype(), + ]; + let mut input1_batch = + Batch::try_from_rows_with_types(vec![Row::new(input1)], &input_type).unwrap(); let ret_batch = safe_mfp.eval_batch_into(&mut input1_batch).unwrap(); assert_eq!( diff --git a/tests/cases/standalone/common/flow/flow_basic.result b/tests/cases/standalone/common/flow/flow_basic.result index fa360a6de684..c70fe54fec19 100644 --- a/tests/cases/standalone/common/flow/flow_basic.result +++ b/tests/cases/standalone/common/flow/flow_basic.result @@ -390,6 +390,65 @@ GROUP BY Affected Rows: 0 +INSERT INTO + bytes_log +VALUES + (NULL, '2023-01-01 00:00:01'), + (300, '2023-01-01 00:00:29'); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('find_approx_rate'); + ++--------------------------------------+ +| ADMIN FLUSH_FLOW('find_approx_rate') | ++--------------------------------------+ +| FLOW_FLUSHED | ++--------------------------------------+ + +SELECT + rate, + time_window +FROM + approx_rate; + ++------+---------------------+ +| rate | time_window | ++------+---------------------+ +| 0.0 | 2023-01-01T00:00:00 | ++------+---------------------+ + +INSERT INTO + bytes_log +VALUES + (NULL, '2022-01-01 00:00:01'), + (NULL, '2022-01-01 00:00:29'); + +Affected Rows: 2 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('find_approx_rate'); + ++--------------------------------------+ +| ADMIN FLUSH_FLOW('find_approx_rate') | ++--------------------------------------+ +| FLOW_FLUSHED | ++--------------------------------------+ + +SELECT + rate, + time_window +FROM + approx_rate; + ++------+---------------------+ +| rate | time_window | ++------+---------------------+ +| | 2022-01-01T00:00:00 | +| 0.0 | 2023-01-01T00:00:00 | ++------+---------------------+ + INSERT INTO bytes_log VALUES @@ -416,6 +475,8 @@ FROM +-------------------+---------------------+ | rate | time_window | +-------------------+---------------------+ +| | 2022-01-01T00:00:00 | +| 0.0 | 2023-01-01T00:00:00 | | 6.633333333333334 | 2025-01-01T00:00:00 | +-------------------+---------------------+ @@ -445,6 +506,8 @@ FROM +--------------------+---------------------+ | rate | time_window | +--------------------+---------------------+ +| | 2022-01-01T00:00:00 | +| 0.0 | 2023-01-01T00:00:00 | | 6.633333333333334 | 2025-01-01T00:00:00 | | 1.6666666666666667 | 2025-01-01T00:00:30 | +--------------------+---------------------+ @@ -992,6 +1055,7 @@ CREATE TABLE requests_without_ip ( service_name STRING, val INT, ts TIMESTAMP TIME INDEX, + PRIMARY KEY(service_name) ); Affected Rows: 0 @@ -1009,12 +1073,12 @@ Affected Rows: 0 INSERT INTO requests VALUES - ("svc1", "10.0.0.1", 100, "2024-10-18 19:00:00"), + (NULL, "10.0.0.1", 100, "2024-10-18 19:00:00"), ("svc1", "10.0.0.2", 100, "2024-10-18 19:00:00"), - ("svc1", "10.0.0.1", 200, "2024-10-18 19:00:30"), + (NULL, "10.0.0.1", 200, "2024-10-18 19:00:30"), ("svc1", "10.0.0.2", 200, "2024-10-18 19:00:30"), - ("svc1", "10.0.0.1", 300, "2024-10-18 19:01:00"), - ("svc1", "10.0.0.2", 100, "2024-10-18 19:01:01"), + (NULL, "10.0.0.1", 300, "2024-10-18 19:01:00"), + (NULL, "10.0.0.2", 100, "2024-10-18 19:01:01"), ("svc1", "10.0.0.1", 400, "2024-10-18 19:01:30"), ("svc1", "10.0.0.2", 200, "2024-10-18 19:01:31"); @@ -1037,10 +1101,12 @@ FROM +--------------+-----+---------------------+ | service_name | val | ts | +--------------+-----+---------------------+ +| | 100 | 2024-10-18T19:00:00 | +| | 200 | 2024-10-18T19:00:30 | +| | 300 | 2024-10-18T19:01:00 | +| | 100 | 2024-10-18T19:01:01 | | svc1 | 100 | 2024-10-18T19:00:00 | | svc1 | 200 | 2024-10-18T19:00:30 | -| svc1 | 300 | 2024-10-18T19:01:00 | -| svc1 | 100 | 2024-10-18T19:01:01 | | svc1 | 400 | 2024-10-18T19:01:30 | | svc1 | 200 | 2024-10-18T19:01:31 | +--------------+-----+---------------------+ @@ -1057,6 +1123,106 @@ FROM | 1 | +--------------+ +INSERT INTO + requests +VALUES + (null, "10.0.0.1", 100, "2024-10-19 19:00:00"), + (null, "10.0.0.2", 100, "2024-10-19 19:00:00"), + (null, "10.0.0.1", 200, "2024-10-19 19:00:30"), + (null, "10.0.0.2", 200, "2024-10-19 19:00:30"), + (null, "10.0.0.1", 300, "2024-10-19 19:01:00"), + (null, "10.0.0.2", 100, "2024-10-19 19:01:01"), + (null, "10.0.0.1", 400, "2024-10-19 19:01:30"), + (null, "10.0.0.2", 200, "2024-10-19 19:01:31"); + +Affected Rows: 8 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('requests_long_term'); + ++----------------------------------------+ +| ADMIN FLUSH_FLOW('requests_long_term') | ++----------------------------------------+ +| FLOW_FLUSHED | ++----------------------------------------+ + +SELECT + * +FROM + requests_without_ip; + ++--------------+-----+---------------------+ +| service_name | val | ts | ++--------------+-----+---------------------+ +| | 100 | 2024-10-18T19:00:00 | +| | 200 | 2024-10-18T19:00:30 | +| | 300 | 2024-10-18T19:01:00 | +| | 100 | 2024-10-18T19:01:01 | +| | 100 | 2024-10-19T19:00:00 | +| | 200 | 2024-10-19T19:00:30 | +| | 300 | 2024-10-19T19:01:00 | +| | 100 | 2024-10-19T19:01:01 | +| | 400 | 2024-10-19T19:01:30 | +| | 200 | 2024-10-19T19:01:31 | +| svc1 | 100 | 2024-10-18T19:00:00 | +| svc1 | 200 | 2024-10-18T19:00:30 | +| svc1 | 400 | 2024-10-18T19:01:30 | +| svc1 | 200 | 2024-10-18T19:01:31 | ++--------------+-----+---------------------+ + +INSERT INTO + requests +VALUES + ("svc2", "10.0.0.1", 100, "2024-10-18 19:00:00"), + ("svc2", "10.0.0.2", 100, "2024-10-18 19:00:00"), + ("svc2", "10.0.0.1", 200, "2024-10-18 19:00:30"), + ("svc2", "10.0.0.2", 200, "2024-10-18 19:00:30"), + ("svc2", "10.0.0.1", 300, "2024-10-18 19:01:00"), + ("svc2", "10.0.0.2", 100, "2024-10-18 19:01:01"), + ("svc2", "10.0.0.1", 400, "2024-10-18 19:01:30"), + ("svc2", "10.0.0.2", 200, "2024-10-18 19:01:31"); + +Affected Rows: 8 + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('requests_long_term'); + ++----------------------------------------+ +| ADMIN FLUSH_FLOW('requests_long_term') | ++----------------------------------------+ +| FLOW_FLUSHED | ++----------------------------------------+ + +SELECT + * +FROM + requests_without_ip; + ++--------------+-----+---------------------+ +| service_name | val | ts | ++--------------+-----+---------------------+ +| | 100 | 2024-10-18T19:00:00 | +| | 200 | 2024-10-18T19:00:30 | +| | 300 | 2024-10-18T19:01:00 | +| | 100 | 2024-10-18T19:01:01 | +| | 100 | 2024-10-19T19:00:00 | +| | 200 | 2024-10-19T19:00:30 | +| | 300 | 2024-10-19T19:01:00 | +| | 100 | 2024-10-19T19:01:01 | +| | 400 | 2024-10-19T19:01:30 | +| | 200 | 2024-10-19T19:01:31 | +| svc1 | 100 | 2024-10-18T19:00:00 | +| svc1 | 200 | 2024-10-18T19:00:30 | +| svc1 | 400 | 2024-10-18T19:01:30 | +| svc1 | 200 | 2024-10-18T19:01:31 | +| svc2 | 100 | 2024-10-18T19:00:00 | +| svc2 | 200 | 2024-10-18T19:00:30 | +| svc2 | 300 | 2024-10-18T19:01:00 | +| svc2 | 100 | 2024-10-18T19:01:01 | +| svc2 | 400 | 2024-10-18T19:01:30 | +| svc2 | 200 | 2024-10-18T19:01:31 | ++--------------+-----+---------------------+ + DROP FLOW requests_long_term; Affected Rows: 0 diff --git a/tests/cases/standalone/common/flow/flow_basic.sql b/tests/cases/standalone/common/flow/flow_basic.sql index 8946c014be36..74abbc85df22 100644 --- a/tests/cases/standalone/common/flow/flow_basic.sql +++ b/tests/cases/standalone/common/flow/flow_basic.sql @@ -214,6 +214,36 @@ from GROUP BY time_window; +INSERT INTO + bytes_log +VALUES + (NULL, '2023-01-01 00:00:01'), + (300, '2023-01-01 00:00:29'); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('find_approx_rate'); + +SELECT + rate, + time_window +FROM + approx_rate; + +INSERT INTO + bytes_log +VALUES + (NULL, '2022-01-01 00:00:01'), + (NULL, '2022-01-01 00:00:29'); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('find_approx_rate'); + +SELECT + rate, + time_window +FROM + approx_rate; + INSERT INTO bytes_log VALUES @@ -539,6 +569,7 @@ CREATE TABLE requests_without_ip ( service_name STRING, val INT, ts TIMESTAMP TIME INDEX, + PRIMARY KEY(service_name) ); CREATE FLOW requests_long_term SINK TO requests_without_ip AS @@ -552,12 +583,12 @@ FROM INSERT INTO requests VALUES - ("svc1", "10.0.0.1", 100, "2024-10-18 19:00:00"), + (NULL, "10.0.0.1", 100, "2024-10-18 19:00:00"), ("svc1", "10.0.0.2", 100, "2024-10-18 19:00:00"), - ("svc1", "10.0.0.1", 200, "2024-10-18 19:00:30"), + (NULL, "10.0.0.1", 200, "2024-10-18 19:00:30"), ("svc1", "10.0.0.2", 200, "2024-10-18 19:00:30"), - ("svc1", "10.0.0.1", 300, "2024-10-18 19:01:00"), - ("svc1", "10.0.0.2", 100, "2024-10-18 19:01:01"), + (NULL, "10.0.0.1", 300, "2024-10-18 19:01:00"), + (NULL, "10.0.0.2", 100, "2024-10-18 19:01:01"), ("svc1", "10.0.0.1", 400, "2024-10-18 19:01:30"), ("svc1", "10.0.0.2", 200, "2024-10-18 19:01:31"); @@ -575,6 +606,46 @@ SELECT FROM INFORMATION_SCHEMA.FLOWS; +INSERT INTO + requests +VALUES + (null, "10.0.0.1", 100, "2024-10-19 19:00:00"), + (null, "10.0.0.2", 100, "2024-10-19 19:00:00"), + (null, "10.0.0.1", 200, "2024-10-19 19:00:30"), + (null, "10.0.0.2", 200, "2024-10-19 19:00:30"), + (null, "10.0.0.1", 300, "2024-10-19 19:01:00"), + (null, "10.0.0.2", 100, "2024-10-19 19:01:01"), + (null, "10.0.0.1", 400, "2024-10-19 19:01:30"), + (null, "10.0.0.2", 200, "2024-10-19 19:01:31"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('requests_long_term'); + +SELECT + * +FROM + requests_without_ip; + +INSERT INTO + requests +VALUES + ("svc2", "10.0.0.1", 100, "2024-10-18 19:00:00"), + ("svc2", "10.0.0.2", 100, "2024-10-18 19:00:00"), + ("svc2", "10.0.0.1", 200, "2024-10-18 19:00:30"), + ("svc2", "10.0.0.2", 200, "2024-10-18 19:00:30"), + ("svc2", "10.0.0.1", 300, "2024-10-18 19:01:00"), + ("svc2", "10.0.0.2", 100, "2024-10-18 19:01:01"), + ("svc2", "10.0.0.1", 400, "2024-10-18 19:01:30"), + ("svc2", "10.0.0.2", 200, "2024-10-18 19:01:31"); + +-- SQLNESS REPLACE (ADMIN\sFLUSH_FLOW\('\w+'\)\s+\|\n\+-+\+\n\|\s+)[0-9]+\s+\| $1 FLOW_FLUSHED | +ADMIN FLUSH_FLOW('requests_long_term'); + +SELECT + * +FROM + requests_without_ip; + DROP FLOW requests_long_term; DROP TABLE requests_without_ip; From 6bf574f098cac4950ab52eaf6b11a50caf14824b Mon Sep 17 00:00:00 2001 From: discord9 <55937128+discord9@users.noreply.github.com> Date: Thu, 19 Dec 2024 19:23:01 +0800 Subject: [PATCH 56/59] fix: auto created table ttl check (#5203) * fix: auto created table ttl check * tests: with hint --- src/operator/src/insert.rs | 6 ++++++ tests-integration/tests/grpc.rs | 34 +++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/src/operator/src/insert.rs b/src/operator/src/insert.rs index ec01b329457f..466dde5425c1 100644 --- a/src/operator/src/insert.rs +++ b/src/operator/src/insert.rs @@ -576,6 +576,9 @@ impl Inserter { for table in tables { let table_info = table.table_info(); + if table_info.is_ttl_instant_table() { + instant_table_ids.insert(table_info.table_id()); + } table_name_to_ids.insert(table_info.name.clone(), table_info.table_id()); } } @@ -596,6 +599,9 @@ impl Inserter { .create_physical_table(create_table, ctx, statement_executor) .await?; let table_info = table.table_info(); + if table_info.is_ttl_instant_table() { + instant_table_ids.insert(table_info.table_id()); + } table_name_to_ids.insert(table_info.name.clone(), table_info.table_id()); } for alter_expr in alter_tables.into_iter() { diff --git a/tests-integration/tests/grpc.rs b/tests-integration/tests/grpc.rs index 8b91ed55d520..74c8a6c0f73d 100644 --- a/tests-integration/tests/grpc.rs +++ b/tests-integration/tests/grpc.rs @@ -444,6 +444,40 @@ async fn insert_with_hints_and_assert(db: &Database) { +-------+-------------------------------------+\ "; assert_eq!(pretty, expected); + + // testing data with ttl=instant and auto_create_table = true can be handled correctly + let (expected_host_col, expected_cpu_col, expected_mem_col, expected_ts_col) = expect_data(); + + let request = InsertRequest { + table_name: "demo1".to_string(), + columns: vec![ + expected_host_col.clone(), + expected_cpu_col.clone(), + expected_mem_col.clone(), + expected_ts_col.clone(), + ], + row_count: 4, + }; + let result = db + .insert_with_hints( + InsertRequests { + inserts: vec![request], + }, + &[("auto_create_table", "true"), ("ttl", "instant")], + ) + .await; + assert_eq!(result.unwrap(), 0); + + // check table is empty + let output = db.sql("SELECT * FROM demo1").await.unwrap(); + + let record_batches = match output.data { + OutputData::RecordBatches(record_batches) => record_batches, + OutputData::Stream(stream) => RecordBatches::try_collect(stream).await.unwrap(), + OutputData::AffectedRows(_) => unreachable!(), + }; + + assert!(record_batches.iter().all(|r| r.num_rows() == 0)); } async fn insert_and_assert(db: &Database) { From a578eea801459100596fa9d5be7752cccddaddec Mon Sep 17 00:00:00 2001 From: LFC <990479+MichaelScofield@users.noreply.github.com> Date: Fri, 20 Dec 2024 10:45:53 +0800 Subject: [PATCH 57/59] ci: install latest protobuf in dev-builder image (#5196) --- docker/dev-builder/ubuntu/Dockerfile | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/docker/dev-builder/ubuntu/Dockerfile b/docker/dev-builder/ubuntu/Dockerfile index 733bfdab62f5..9f16161ac0c2 100644 --- a/docker/dev-builder/ubuntu/Dockerfile +++ b/docker/dev-builder/ubuntu/Dockerfile @@ -15,8 +15,8 @@ RUN apt-get update && \ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ libssl-dev \ tzdata \ - protobuf-compiler \ curl \ + unzip \ ca-certificates \ git \ build-essential \ @@ -24,6 +24,20 @@ RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ python3.10 \ python3.10-dev +ARG TARGETPLATFORM +RUN echo "target platform: $TARGETPLATFORM" + +# Install protobuf, because the one in the apt is too old (v3.12). +RUN if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \ + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v29.1/protoc-29.1-linux-aarch_64.zip && \ + unzip protoc-29.1-linux-aarch_64.zip -d protoc3; \ +elif [ "$TARGETPLATFORM" = "linux/amd64" ]; then \ + curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v29.1/protoc-29.1-linux-x86_64.zip && \ + unzip protoc-29.1-linux-x86_64.zip -d protoc3; \ +fi +RUN mv protoc3/bin/* /usr/local/bin/ +RUN mv protoc3/include/* /usr/local/include/ + # https://github.com/GreptimeTeam/greptimedb/actions/runs/10935485852/job/30357457188#step:3:7106 # `aws-lc-sys` require gcc >= 10.3.0 to work, hence alias to use gcc-10 RUN apt-get remove -y gcc-9 g++-9 cpp-9 && \ @@ -49,7 +63,7 @@ RUN apt-get -y purge python3.8 && \ # wildcard here. However, that requires the git's config files and the submodules all owned by the very same user. # It's troublesome to do this since the dev build runs in Docker, which is under user "root"; while outside the Docker, # it can be a different user that have prepared the submodules. -RUN git config --global --add safe.directory * +RUN git config --global --add safe.directory '*' # Install Python dependencies. COPY $DOCKER_BUILD_ROOT/docker/python/requirements.txt /etc/greptime/requirements.txt From d9394774580f470703b419f88a10de76df98435c Mon Sep 17 00:00:00 2001 From: evenyag Date: Fri, 20 Dec 2024 12:50:30 +0800 Subject: [PATCH 58/59] chore: Downgrade opendal for releasing 0.11.1 Revert "feat: bump opendal and switch prometheus layer to the upstream impl (#5179)" This reverts commit 422d18da8bbdaba3b3a9b93bea6ef9bc3b76ab2f. --- Cargo.lock | 25 +- src/common/datasource/src/object_store/fs.rs | 2 +- src/common/datasource/src/object_store/s3.rs | 2 +- src/common/procedure/src/local/runner.rs | 8 +- src/datanode/src/error.rs | 15 +- src/datanode/src/store.rs | 5 +- src/file-engine/src/manifest.rs | 2 +- src/file-engine/src/region.rs | 6 +- src/metric-engine/src/test_util.rs | 4 +- src/mito2/src/cache/file_cache.rs | 4 +- src/mito2/src/engine/create_test.rs | 4 +- src/mito2/src/engine/drop_test.rs | 12 +- src/mito2/src/engine/open_test.rs | 4 +- src/mito2/src/manifest/tests/checkpoint.rs | 2 - src/mito2/src/sst/file_purger.rs | 6 +- src/mito2/src/worker/handle_open.rs | 2 +- src/object-store/Cargo.toml | 3 +- src/object-store/src/layers.rs | 33 +- .../src/layers/lru_cache/read_cache.rs | 9 +- src/object-store/src/layers/prometheus.rs | 584 ++++++++++++++++++ src/object-store/src/util.rs | 49 +- src/object-store/tests/object_store_test.rs | 61 +- 22 files changed, 708 insertions(+), 134 deletions(-) create mode 100644 src/object-store/src/layers/prometheus.rs diff --git a/Cargo.lock b/Cargo.lock index c23acf60636d..7f38d0d8b183 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -896,6 +896,18 @@ dependencies = [ "rand", ] +[[package]] +name = "backon" +version = "0.4.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d67782c3f868daa71d3533538e98a8e13713231969def7536e8039606fc46bf0" +dependencies = [ + "fastrand", + "futures-core", + "pin-project", + "tokio", +] + [[package]] name = "backon" version = "1.2.0" @@ -2252,7 +2264,7 @@ version = "0.12.0" dependencies = [ "async-stream", "async-trait", - "backon", + "backon 1.2.0", "common-base", "common-error", "common-macro", @@ -7469,13 +7481,13 @@ checksum = "b410bbe7e14ab526a0e86877eb47c6996a2bd7746f027ba551028c925390e4e9" [[package]] name = "opendal" -version = "0.50.2" +version = "0.49.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cb28bb6c64e116ceaf8dd4e87099d3cfea4a58e85e62b104fef74c91afba0f44" +checksum = "9b04d09b9822c2f75a1d2fc513a2c1279c70e91e7407936fffdf6a6976ec530a" dependencies = [ "anyhow", "async-trait", - "backon", + "backon 0.4.4", "base64 0.22.1", "bytes", "chrono", @@ -7488,7 +7500,6 @@ dependencies = [ "md-5", "once_cell", "percent-encoding", - "prometheus", "quick-xml 0.36.2", "reqsign", "reqwest", @@ -9504,9 +9515,9 @@ dependencies = [ [[package]] name = "reqsign" -version = "0.16.1" +version = "0.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb0075a66c8bfbf4cc8b70dca166e722e1f55a3ea9250ecbb85f4d92a5f64149" +checksum = "03dd4ba7c3901dd43e6b8c7446a760d45bc1ea4301002e1a6fa48f97c3a796fa" dependencies = [ "anyhow", "async-trait", diff --git a/src/common/datasource/src/object_store/fs.rs b/src/common/datasource/src/object_store/fs.rs index 5ffbbfa3148a..f87311f517b7 100644 --- a/src/common/datasource/src/object_store/fs.rs +++ b/src/common/datasource/src/object_store/fs.rs @@ -27,7 +27,7 @@ pub fn build_fs_backend(root: &str) -> Result { DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::build_prometheus_metrics_layer(true)) + .layer(object_store::layers::PrometheusMetricsLayer::new(true)) .finish(); Ok(object_store) } diff --git a/src/common/datasource/src/object_store/s3.rs b/src/common/datasource/src/object_store/s3.rs index 0d83eb7a98b8..e141621b899b 100644 --- a/src/common/datasource/src/object_store/s3.rs +++ b/src/common/datasource/src/object_store/s3.rs @@ -89,7 +89,7 @@ pub fn build_s3_backend( DefaultLoggingInterceptor, )) .layer(object_store::layers::TracingLayer) - .layer(object_store::layers::build_prometheus_metrics_layer(true)) + .layer(object_store::layers::PrometheusMetricsLayer::new(true)) .finish()) } diff --git a/src/common/procedure/src/local/runner.rs b/src/common/procedure/src/local/runner.rs index bf277a0e72e5..c2d15001fba3 100644 --- a/src/common/procedure/src/local/runner.rs +++ b/src/common/procedure/src/local/runner.rs @@ -544,7 +544,7 @@ mod tests { use common_test_util::temp_dir::create_temp_dir; use futures_util::future::BoxFuture; use futures_util::FutureExt; - use object_store::{EntryMode, ObjectStore}; + use object_store::ObjectStore; use tokio::sync::mpsc; use super::*; @@ -578,11 +578,7 @@ mod tests { ) { let dir = proc_path!(procedure_store, "{procedure_id}/"); let lister = object_store.list(&dir).await.unwrap(); - let mut files_in_dir: Vec<_> = lister - .into_iter() - .filter(|x| x.metadata().mode() == EntryMode::FILE) - .map(|de| de.name().to_string()) - .collect(); + let mut files_in_dir: Vec<_> = lister.into_iter().map(|de| de.name().to_string()).collect(); files_in_dir.sort_unstable(); assert_eq!(files, files_in_dir); } diff --git a/src/datanode/src/error.rs b/src/datanode/src/error.rs index 61a4eae12883..9fbd46e16009 100644 --- a/src/datanode/src/error.rs +++ b/src/datanode/src/error.rs @@ -193,14 +193,6 @@ pub enum Error { location: Location, }, - #[snafu(display("Failed to build http client"))] - BuildHttpClient { - #[snafu(implicit)] - location: Location, - #[snafu(source)] - error: reqwest::Error, - }, - #[snafu(display("Missing required field: {}", name))] MissingRequiredField { name: String, @@ -414,10 +406,9 @@ impl ErrorExt for Error { | MissingKvBackend { .. } | TomlFormat { .. } => StatusCode::InvalidArguments, - PayloadNotExist { .. } - | Unexpected { .. } - | WatchAsyncTaskChange { .. } - | BuildHttpClient { .. } => StatusCode::Unexpected, + PayloadNotExist { .. } | Unexpected { .. } | WatchAsyncTaskChange { .. } => { + StatusCode::Unexpected + } AsyncTaskExecute { source, .. } => source.status_code(), diff --git a/src/datanode/src/store.rs b/src/datanode/src/store.rs index 52a1cba982e1..c78afe448e0c 100644 --- a/src/datanode/src/store.rs +++ b/src/datanode/src/store.rs @@ -32,7 +32,7 @@ use object_store::{Access, Error, HttpClient, ObjectStore, ObjectStoreBuilder, O use snafu::prelude::*; use crate::config::{HttpClientConfig, ObjectStoreConfig, DEFAULT_OBJECT_STORE_CACHE_SIZE}; -use crate::error::{self, BuildHttpClientSnafu, CreateDirSnafu, Result}; +use crate::error::{self, CreateDirSnafu, Result}; pub(crate) async fn new_raw_object_store( store: &ObjectStoreConfig, @@ -236,8 +236,7 @@ pub(crate) fn build_http_client(config: &HttpClientConfig) -> Result builder.timeout(config.timeout) }; - let client = http_builder.build().context(BuildHttpClientSnafu)?; - Ok(HttpClient::with(client)) + HttpClient::build(http_builder).context(error::InitBackendSnafu) } struct PrintDetailedError; diff --git a/src/file-engine/src/manifest.rs b/src/file-engine/src/manifest.rs index 6bf5ee104ba2..6310c3ccb912 100644 --- a/src/file-engine/src/manifest.rs +++ b/src/file-engine/src/manifest.rs @@ -46,7 +46,7 @@ impl FileRegionManifest { pub async fn store(&self, region_dir: &str, object_store: &ObjectStore) -> Result<()> { let path = ®ion_manifest_path(region_dir); let exist = object_store - .exists(path) + .is_exist(path) .await .context(CheckObjectSnafu { path })?; ensure!(!exist, ManifestExistsSnafu { path }); diff --git a/src/file-engine/src/region.rs b/src/file-engine/src/region.rs index 673d352b1e63..a5af6822285e 100644 --- a/src/file-engine/src/region.rs +++ b/src/file-engine/src/region.rs @@ -130,7 +130,7 @@ mod tests { assert_eq!(region.metadata.primary_key, vec![1]); assert!(object_store - .exists("create_region_dir/manifest/_file_manifest") + .is_exist("create_region_dir/manifest/_file_manifest") .await .unwrap()); @@ -198,13 +198,13 @@ mod tests { .unwrap(); assert!(object_store - .exists("drop_region_dir/manifest/_file_manifest") + .is_exist("drop_region_dir/manifest/_file_manifest") .await .unwrap()); FileRegion::drop(®ion, &object_store).await.unwrap(); assert!(!object_store - .exists("drop_region_dir/manifest/_file_manifest") + .is_exist("drop_region_dir/manifest/_file_manifest") .await .unwrap()); diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index d0f8cf5028e6..c5f7a2b4a32c 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -313,12 +313,12 @@ mod test { let region_dir = "test_metric_region"; // assert metadata region's dir let metadata_region_dir = join_dir(region_dir, METADATA_REGION_SUBDIR); - let exist = object_store.exists(&metadata_region_dir).await.unwrap(); + let exist = object_store.is_exist(&metadata_region_dir).await.unwrap(); assert!(exist); // assert data region's dir let data_region_dir = join_dir(region_dir, DATA_REGION_SUBDIR); - let exist = object_store.exists(&data_region_dir).await.unwrap(); + let exist = object_store.is_exist(&data_region_dir).await.unwrap(); assert!(exist); // check mito engine diff --git a/src/mito2/src/cache/file_cache.rs b/src/mito2/src/cache/file_cache.rs index eb112530cad7..9e5742ca0410 100644 --- a/src/mito2/src/cache/file_cache.rs +++ b/src/mito2/src/cache/file_cache.rs @@ -286,7 +286,7 @@ impl FileCache { } async fn get_reader(&self, file_path: &str) -> object_store::Result> { - if self.local_store.exists(file_path).await? { + if self.local_store.is_exist(file_path).await? { Ok(Some(self.local_store.reader(file_path).await?)) } else { Ok(None) @@ -480,7 +480,7 @@ mod tests { cache.memory_index.run_pending_tasks().await; // The file also not exists. - assert!(!local_store.exists(&file_path).await.unwrap()); + assert!(!local_store.is_exist(&file_path).await.unwrap()); assert_eq!(0, cache.memory_index.weighted_size()); } diff --git a/src/mito2/src/engine/create_test.rs b/src/mito2/src/engine/create_test.rs index 4bcc55934034..48b04dc86d91 100644 --- a/src/mito2/src/engine/create_test.rs +++ b/src/mito2/src/engine/create_test.rs @@ -192,12 +192,12 @@ async fn test_engine_create_with_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .exists(region_dir) + .is_exist(region_dir) .await .unwrap()); assert!(!object_store_manager .default_object_store() - .exists(region_dir) + .is_exist(region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/drop_test.rs b/src/mito2/src/engine/drop_test.rs index 5d0c5afbf06e..7d719f778be9 100644 --- a/src/mito2/src/engine/drop_test.rs +++ b/src/mito2/src/engine/drop_test.rs @@ -71,7 +71,7 @@ async fn test_engine_drop_region() { assert!(!env .get_object_store() .unwrap() - .exists(&join_path(®ion_dir, DROPPING_MARKER_FILE)) + .is_exist(&join_path(®ion_dir, DROPPING_MARKER_FILE)) .await .unwrap()); @@ -93,7 +93,7 @@ async fn test_engine_drop_region() { listener.wait().await; let object_store = env.get_object_store().unwrap(); - assert!(!object_store.exists(®ion_dir).await.unwrap()); + assert!(!object_store.is_exist(®ion_dir).await.unwrap()); } #[tokio::test] @@ -167,13 +167,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(object_store_manager .find("Gcs") .unwrap() - .exists(&custom_region_dir) + .is_exist(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .exists(&global_region_dir) + .is_exist(&global_region_dir) .await .unwrap()); @@ -190,13 +190,13 @@ async fn test_engine_drop_region_for_custom_store() { assert!(!object_store_manager .find("Gcs") .unwrap() - .exists(&custom_region_dir) + .is_exist(&custom_region_dir) .await .unwrap()); assert!(object_store_manager .find("default") .unwrap() - .exists(&global_region_dir) + .is_exist(&global_region_dir) .await .unwrap()); } diff --git a/src/mito2/src/engine/open_test.rs b/src/mito2/src/engine/open_test.rs index a3b51514c287..6752bbd04b12 100644 --- a/src/mito2/src/engine/open_test.rs +++ b/src/mito2/src/engine/open_test.rs @@ -228,13 +228,13 @@ async fn test_engine_region_open_with_custom_store() { let object_store_manager = env.get_object_store_manager().unwrap(); assert!(!object_store_manager .default_object_store() - .exists(region.access_layer.region_dir()) + .is_exist(region.access_layer.region_dir()) .await .unwrap()); assert!(object_store_manager .find("Gcs") .unwrap() - .exists(region.access_layer.region_dir()) + .is_exist(region.access_layer.region_dir()) .await .unwrap()); } diff --git a/src/mito2/src/manifest/tests/checkpoint.rs b/src/mito2/src/manifest/tests/checkpoint.rs index 6f2c92bc5e09..692f40422b17 100644 --- a/src/mito2/src/manifest/tests/checkpoint.rs +++ b/src/mito2/src/manifest/tests/checkpoint.rs @@ -84,7 +84,6 @@ async fn manager_without_checkpoint() { // check files let mut expected = vec![ - "/", "00000000000000000010.json", "00000000000000000009.json", "00000000000000000008.json", @@ -131,7 +130,6 @@ async fn manager_with_checkpoint_distance_1() { // check files let mut expected = vec![ - "/", "00000000000000000009.checkpoint", "00000000000000000010.checkpoint", "00000000000000000010.json", diff --git a/src/mito2/src/sst/file_purger.rs b/src/mito2/src/sst/file_purger.rs index 81251c91a564..76c7a7150328 100644 --- a/src/mito2/src/sst/file_purger.rs +++ b/src/mito2/src/sst/file_purger.rs @@ -185,7 +185,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.exists(&path).await.unwrap()); + assert!(!object_store.is_exist(&path).await.unwrap()); } #[tokio::test] @@ -247,7 +247,7 @@ mod tests { scheduler.stop(true).await.unwrap(); - assert!(!object_store.exists(&path).await.unwrap()); - assert!(!object_store.exists(&index_path).await.unwrap()); + assert!(!object_store.is_exist(&path).await.unwrap()); + assert!(!object_store.is_exist(&index_path).await.unwrap()); } } diff --git a/src/mito2/src/worker/handle_open.rs b/src/mito2/src/worker/handle_open.rs index 01eaf1765224..d4a13a134597 100644 --- a/src/mito2/src/worker/handle_open.rs +++ b/src/mito2/src/worker/handle_open.rs @@ -51,7 +51,7 @@ impl RegionWorkerLoop { // Check if this region is pending drop. And clean the entire dir if so. if !self.dropping_regions.is_region_exists(region_id) && object_store - .exists(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) + .is_exist(&join_path(&request.region_dir, DROPPING_MARKER_FILE)) .await .context(OpenDalSnafu)? { diff --git a/src/object-store/Cargo.toml b/src/object-store/Cargo.toml index b82be7376a72..72e0e2bfbe46 100644 --- a/src/object-store/Cargo.toml +++ b/src/object-store/Cargo.toml @@ -17,9 +17,8 @@ futures.workspace = true lazy_static.workspace = true md5 = "0.7" moka = { workspace = true, features = ["future"] } -opendal = { version = "0.50", features = [ +opendal = { version = "0.49", features = [ "layers-tracing", - "layers-prometheus", "services-azblob", "services-fs", "services-gcs", diff --git a/src/object-store/src/layers.rs b/src/object-store/src/layers.rs index 20108ab63c52..b2145aa6b0e5 100644 --- a/src/object-store/src/layers.rs +++ b/src/object-store/src/layers.rs @@ -13,37 +13,8 @@ // limitations under the License. mod lru_cache; +mod prometheus; pub use lru_cache::*; pub use opendal::layers::*; -pub use prometheus::build_prometheus_metrics_layer; - -mod prometheus { - use std::sync::{Mutex, OnceLock}; - - use opendal::layers::PrometheusLayer; - - static PROMETHEUS_LAYER: OnceLock> = OnceLock::new(); - - pub fn build_prometheus_metrics_layer(with_path_label: bool) -> PrometheusLayer { - PROMETHEUS_LAYER - .get_or_init(|| { - // This logical tries to extract parent path from the object storage operation - // the function also relies on assumption that the region path is built from - // pattern `/catalog/schema/table_id/....` - // - // We'll get the data/catalog/schema from path. - let path_level = if with_path_label { 3 } else { 0 }; - - let layer = PrometheusLayer::builder() - .path_label(path_level) - .register_default() - .unwrap(); - - Mutex::new(layer) - }) - .lock() - .unwrap() - .clone() - } -} +pub use prometheus::PrometheusMetricsLayer; diff --git a/src/object-store/src/layers/lru_cache/read_cache.rs b/src/object-store/src/layers/lru_cache/read_cache.rs index 874b17280d9c..f88b36784d15 100644 --- a/src/object-store/src/layers/lru_cache/read_cache.rs +++ b/src/object-store/src/layers/lru_cache/read_cache.rs @@ -156,12 +156,9 @@ impl ReadCache { let size = entry.metadata().content_length(); OBJECT_STORE_LRU_CACHE_ENTRIES.inc(); OBJECT_STORE_LRU_CACHE_BYTES.add(size as i64); - // ignore root path - if entry.path() != "/" { - self.mem_cache - .insert(read_key.to_string(), ReadResult::Success(size as u32)) - .await; - } + self.mem_cache + .insert(read_key.to_string(), ReadResult::Success(size as u32)) + .await; } Ok(self.cache_stat().await) diff --git a/src/object-store/src/layers/prometheus.rs b/src/object-store/src/layers/prometheus.rs new file mode 100644 index 000000000000..fef83a91468a --- /dev/null +++ b/src/object-store/src/layers/prometheus.rs @@ -0,0 +1,584 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//! code originally from , make a tiny change to avoid crash in multi thread env + +use std::fmt::{Debug, Formatter}; + +use common_telemetry::debug; +use lazy_static::lazy_static; +use opendal::raw::*; +use opendal::{Buffer, ErrorKind}; +use prometheus::{ + exponential_buckets, histogram_opts, register_histogram_vec, register_int_counter_vec, + Histogram, HistogramTimer, HistogramVec, IntCounterVec, +}; + +use crate::util::extract_parent_path; + +type Result = std::result::Result; + +lazy_static! { + static ref REQUESTS_TOTAL: IntCounterVec = register_int_counter_vec!( + "opendal_requests_total", + "Total times of all kinds of operation being called", + &["scheme", "operation", "path"], + ) + .unwrap(); + static ref REQUESTS_DURATION_SECONDS: HistogramVec = register_histogram_vec!( + histogram_opts!( + "opendal_requests_duration_seconds", + "Histogram of the time spent on specific operation", + exponential_buckets(0.01, 2.0, 16).unwrap() + ), + &["scheme", "operation", "path"] + ) + .unwrap(); + static ref BYTES_TOTAL: HistogramVec = register_histogram_vec!( + histogram_opts!( + "opendal_bytes_total", + "Total size of sync or async Read/Write", + exponential_buckets(0.01, 2.0, 16).unwrap() + ), + &["scheme", "operation", "path"] + ) + .unwrap(); +} + +#[inline] +fn increment_errors_total(op: Operation, kind: ErrorKind) { + debug!( + "Prometheus statistics metrics error, operation {} error {}", + op.into_static(), + kind.into_static() + ); +} + +/// Please refer to [prometheus](https://docs.rs/prometheus) for every operation. +/// +/// # Prometheus Metrics +/// +/// In this section, we will introduce three metrics that are currently being exported by opendal. These metrics are essential for understanding the behavior and performance of opendal. +/// +/// +/// | Metric Name | Type | Description | Labels | +/// |-----------------------------------|-----------|------------------------------------------------------|---------------------| +/// | opendal_requests_total | Counter | Total times of all kinds of operation being called | scheme, operation | +/// | opendal_requests_duration_seconds | Histogram | Histogram of the time spent on specific operation | scheme, operation | +/// | opendal_bytes_total | Histogram | Total size of sync or async Read/Write | scheme, operation | +/// +/// For a more detailed explanation of these metrics and how they are used, please refer to the [Prometheus documentation](https://prometheus.io/docs/introduction/overview/). +/// +/// # Histogram Configuration +/// +/// The metric buckets for these histograms are automatically generated based on the `exponential_buckets(0.01, 2.0, 16)` configuration. +#[derive(Default, Debug, Clone)] +pub struct PrometheusMetricsLayer { + pub path_label: bool, +} + +impl PrometheusMetricsLayer { + pub fn new(path_label: bool) -> Self { + Self { path_label } + } +} + +impl Layer for PrometheusMetricsLayer { + type LayeredAccess = PrometheusAccess; + + fn layer(&self, inner: A) -> Self::LayeredAccess { + let meta = inner.info(); + let scheme = meta.scheme(); + + PrometheusAccess { + inner, + scheme: scheme.to_string(), + path_label: self.path_label, + } + } +} + +#[derive(Clone)] +pub struct PrometheusAccess { + inner: A, + scheme: String, + path_label: bool, +} + +impl PrometheusAccess { + fn get_path_label<'a>(&self, path: &'a str) -> &'a str { + if self.path_label { + extract_parent_path(path) + } else { + "" + } + } +} + +impl Debug for PrometheusAccess { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + f.debug_struct("PrometheusAccessor") + .field("inner", &self.inner) + .finish_non_exhaustive() + } +} + +impl LayeredAccess for PrometheusAccess { + type Inner = A; + type Reader = PrometheusMetricWrapper; + type BlockingReader = PrometheusMetricWrapper; + type Writer = PrometheusMetricWrapper; + type BlockingWriter = PrometheusMetricWrapper; + type Lister = A::Lister; + type BlockingLister = A::BlockingLister; + + fn inner(&self) -> &Self::Inner { + &self.inner + } + + async fn create_dir(&self, path: &str, args: OpCreateDir) -> Result { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::CreateDir.into_static(), path_label]) + .start_timer(); + let create_res = self.inner.create_dir(path, args).await; + + timer.observe_duration(); + create_res.inspect_err(|e| { + increment_errors_total(Operation::CreateDir, e.kind()); + }) + } + + async fn read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::Reader)> { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::Read.into_static(), path_label]) + .start_timer(); + + let (rp, r) = self.inner.read(path, args).await.inspect_err(|e| { + increment_errors_total(Operation::Read, e.kind()); + })?; + + Ok(( + rp, + PrometheusMetricWrapper::new( + r, + Operation::Read, + BYTES_TOTAL.with_label_values(&[ + &self.scheme, + Operation::Read.into_static(), + path_label, + ]), + timer, + ), + )) + } + + async fn write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::Writer)> { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::Write.into_static(), path_label]) + .start_timer(); + + let (rp, r) = self.inner.write(path, args).await.inspect_err(|e| { + increment_errors_total(Operation::Write, e.kind()); + })?; + + Ok(( + rp, + PrometheusMetricWrapper::new( + r, + Operation::Write, + BYTES_TOTAL.with_label_values(&[ + &self.scheme, + Operation::Write.into_static(), + path_label, + ]), + timer, + ), + )) + } + + async fn stat(&self, path: &str, args: OpStat) -> Result { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) + .inc(); + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::Stat.into_static(), path_label]) + .start_timer(); + + let stat_res = self.inner.stat(path, args).await; + timer.observe_duration(); + stat_res.inspect_err(|e| { + increment_errors_total(Operation::Stat, e.kind()); + }) + } + + async fn delete(&self, path: &str, args: OpDelete) -> Result { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::Delete.into_static(), path_label]) + .start_timer(); + + let delete_res = self.inner.delete(path, args).await; + timer.observe_duration(); + delete_res.inspect_err(|e| { + increment_errors_total(Operation::Delete, e.kind()); + }) + } + + async fn list(&self, path: &str, args: OpList) -> Result<(RpList, Self::Lister)> { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::List.into_static(), path_label]) + .start_timer(); + + let list_res = self.inner.list(path, args).await; + + timer.observe_duration(); + list_res.inspect_err(|e| { + increment_errors_total(Operation::List, e.kind()); + }) + } + + async fn batch(&self, args: OpBatch) -> Result { + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::Batch.into_static(), ""]) + .start_timer(); + let result = self.inner.batch(args).await; + + timer.observe_duration(); + result.inspect_err(|e| { + increment_errors_total(Operation::Batch, e.kind()); + }) + } + + async fn presign(&self, path: &str, args: OpPresign) -> Result { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[&self.scheme, Operation::Presign.into_static(), path_label]) + .start_timer(); + let result = self.inner.presign(path, args).await; + timer.observe_duration(); + + result.inspect_err(|e| { + increment_errors_total(Operation::Presign, e.kind()); + }) + } + + fn blocking_create_dir(&self, path: &str, args: OpCreateDir) -> Result { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[ + &self.scheme, + Operation::BlockingCreateDir.into_static(), + path_label, + ]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[ + &self.scheme, + Operation::BlockingCreateDir.into_static(), + path_label, + ]) + .start_timer(); + let result = self.inner.blocking_create_dir(path, args); + + timer.observe_duration(); + + result.inspect_err(|e| { + increment_errors_total(Operation::BlockingCreateDir, e.kind()); + }) + } + + fn blocking_read(&self, path: &str, args: OpRead) -> Result<(RpRead, Self::BlockingReader)> { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[ + &self.scheme, + Operation::BlockingRead.into_static(), + path_label, + ]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[ + &self.scheme, + Operation::BlockingRead.into_static(), + path_label, + ]) + .start_timer(); + + self.inner + .blocking_read(path, args) + .map(|(rp, r)| { + ( + rp, + PrometheusMetricWrapper::new( + r, + Operation::BlockingRead, + BYTES_TOTAL.with_label_values(&[ + &self.scheme, + Operation::BlockingRead.into_static(), + path_label, + ]), + timer, + ), + ) + }) + .inspect_err(|e| { + increment_errors_total(Operation::BlockingRead, e.kind()); + }) + } + + fn blocking_write(&self, path: &str, args: OpWrite) -> Result<(RpWrite, Self::BlockingWriter)> { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[ + &self.scheme, + Operation::BlockingWrite.into_static(), + path_label, + ]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[ + &self.scheme, + Operation::BlockingWrite.into_static(), + path_label, + ]) + .start_timer(); + + self.inner + .blocking_write(path, args) + .map(|(rp, r)| { + ( + rp, + PrometheusMetricWrapper::new( + r, + Operation::BlockingWrite, + BYTES_TOTAL.with_label_values(&[ + &self.scheme, + Operation::BlockingWrite.into_static(), + path_label, + ]), + timer, + ), + ) + }) + .inspect_err(|e| { + increment_errors_total(Operation::BlockingWrite, e.kind()); + }) + } + + fn blocking_stat(&self, path: &str, args: OpStat) -> Result { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[ + &self.scheme, + Operation::BlockingStat.into_static(), + path_label, + ]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[ + &self.scheme, + Operation::BlockingStat.into_static(), + path_label, + ]) + .start_timer(); + let result = self.inner.blocking_stat(path, args); + timer.observe_duration(); + result.inspect_err(|e| { + increment_errors_total(Operation::BlockingStat, e.kind()); + }) + } + + fn blocking_delete(&self, path: &str, args: OpDelete) -> Result { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[ + &self.scheme, + Operation::BlockingDelete.into_static(), + path_label, + ]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[ + &self.scheme, + Operation::BlockingDelete.into_static(), + path_label, + ]) + .start_timer(); + let result = self.inner.blocking_delete(path, args); + timer.observe_duration(); + + result.inspect_err(|e| { + increment_errors_total(Operation::BlockingDelete, e.kind()); + }) + } + + fn blocking_list(&self, path: &str, args: OpList) -> Result<(RpList, Self::BlockingLister)> { + let path_label = self.get_path_label(path); + REQUESTS_TOTAL + .with_label_values(&[ + &self.scheme, + Operation::BlockingList.into_static(), + path_label, + ]) + .inc(); + + let timer = REQUESTS_DURATION_SECONDS + .with_label_values(&[ + &self.scheme, + Operation::BlockingList.into_static(), + path_label, + ]) + .start_timer(); + let result = self.inner.blocking_list(path, args); + timer.observe_duration(); + + result.inspect_err(|e| { + increment_errors_total(Operation::BlockingList, e.kind()); + }) + } +} + +pub struct PrometheusMetricWrapper { + inner: R, + + op: Operation, + bytes_counter: Histogram, + _requests_duration_timer: HistogramTimer, + bytes: u64, +} + +impl Drop for PrometheusMetricWrapper { + fn drop(&mut self) { + self.bytes_counter.observe(self.bytes as f64); + } +} + +impl PrometheusMetricWrapper { + fn new( + inner: R, + op: Operation, + bytes_counter: Histogram, + requests_duration_timer: HistogramTimer, + ) -> Self { + Self { + inner, + op, + bytes_counter, + _requests_duration_timer: requests_duration_timer, + bytes: 0, + } + } +} + +impl oio::Read for PrometheusMetricWrapper { + async fn read(&mut self) -> Result { + self.inner.read().await.inspect_err(|err| { + increment_errors_total(self.op, err.kind()); + }) + } +} + +impl oio::BlockingRead for PrometheusMetricWrapper { + fn read(&mut self) -> opendal::Result { + self.inner.read().inspect_err(|err| { + increment_errors_total(self.op, err.kind()); + }) + } +} + +impl oio::Write for PrometheusMetricWrapper { + async fn write(&mut self, bs: Buffer) -> Result<()> { + let bytes = bs.len(); + match self.inner.write(bs).await { + Ok(_) => { + self.bytes += bytes as u64; + Ok(()) + } + Err(err) => { + increment_errors_total(self.op, err.kind()); + Err(err) + } + } + } + + async fn close(&mut self) -> Result<()> { + self.inner.close().await.inspect_err(|err| { + increment_errors_total(self.op, err.kind()); + }) + } + + async fn abort(&mut self) -> Result<()> { + self.inner.close().await.inspect_err(|err| { + increment_errors_total(self.op, err.kind()); + }) + } +} + +impl oio::BlockingWrite for PrometheusMetricWrapper { + fn write(&mut self, bs: Buffer) -> Result<()> { + let bytes = bs.len(); + self.inner + .write(bs) + .map(|_| { + self.bytes += bytes as u64; + }) + .inspect_err(|err| { + increment_errors_total(self.op, err.kind()); + }) + } + + fn close(&mut self) -> Result<()> { + self.inner.close().inspect_err(|err| { + increment_errors_total(self.op, err.kind()); + }) + } +} diff --git a/src/object-store/src/util.rs b/src/object-store/src/util.rs index 271da33e853c..fc0a031ab953 100644 --- a/src/object-store/src/util.rs +++ b/src/object-store/src/util.rs @@ -15,12 +15,19 @@ use std::fmt::Display; use common_telemetry::{debug, error, trace}; +use futures::TryStreamExt; use opendal::layers::{LoggingInterceptor, LoggingLayer, TracingLayer}; use opendal::raw::{AccessorInfo, Operation}; -use opendal::ErrorKind; +use opendal::{Entry, ErrorKind, Lister}; +use crate::layers::PrometheusMetricsLayer; use crate::ObjectStore; +/// Collect all entries from the [Lister]. +pub async fn collect(stream: Lister) -> Result, opendal::Error> { + stream.try_collect::>().await +} + /// Join two paths and normalize the output dir. /// /// The output dir is always ends with `/`. e.g. @@ -120,12 +127,26 @@ pub fn normalize_path(path: &str) -> String { p } +// This logical tries to extract parent path from the object storage operation +// the function also relies on assumption that the region path is built from +// pattern `/catalog/schema/table_id/....` +// +// this implementation tries to extract at most 3 levels of parent path +pub(crate) fn extract_parent_path(path: &str) -> &str { + // split the path into `catalog`, `schema` and others + path.char_indices() + .filter(|&(_, c)| c == '/') + // we get the data/catalog/schema from path, split at the 3rd / + .nth(2) + .map_or(path, |(i, _)| &path[..i]) +} + /// Attaches instrument layers to the object store. pub fn with_instrument_layers(object_store: ObjectStore, path_label: bool) -> ObjectStore { object_store .layer(LoggingLayer::new(DefaultLoggingInterceptor)) .layer(TracingLayer) - .layer(crate::layers::build_prometheus_metrics_layer(path_label)) + .layer(PrometheusMetricsLayer::new(path_label)) } static LOGGING_TARGET: &str = "opendal::services"; @@ -242,4 +263,28 @@ mod tests { assert_eq!("/abc", join_path("//", "/abc")); assert_eq!("abc/def", join_path("abc/", "//def")); } + + #[test] + fn test_path_extraction() { + assert_eq!( + "data/greptime/public", + extract_parent_path("data/greptime/public/1024/1024_0000000000/") + ); + + assert_eq!( + "data/greptime/public", + extract_parent_path("data/greptime/public/1/") + ); + + assert_eq!( + "data/greptime/public", + extract_parent_path("data/greptime/public") + ); + + assert_eq!("data/greptime/", extract_parent_path("data/greptime/")); + + assert_eq!("data/", extract_parent_path("data/")); + + assert_eq!("/", extract_parent_path("/")); + } } diff --git a/src/object-store/tests/object_store_test.rs b/src/object-store/tests/object_store_test.rs index 7e81b965fbed..497decffabfc 100644 --- a/src/object-store/tests/object_store_test.rs +++ b/src/object-store/tests/object_store_test.rs @@ -65,38 +65,23 @@ async fn test_object_list(store: &ObjectStore) -> Result<()> { store.write(p3, "Hello, object3!").await?; // List objects - let entries = store - .list("/") - .await? - .into_iter() - .filter(|x| x.metadata().mode() == EntryMode::FILE) - .collect::>(); + let entries = store.list("/").await?; assert_eq!(3, entries.len()); store.delete(p1).await?; store.delete(p3).await?; // List objects again - // Only o2 and root exist - let entries = store - .list("/") - .await? - .into_iter() - .filter(|x| x.metadata().mode() == EntryMode::FILE) - .collect::>(); + // Only o2 is exists + let entries = store.list("/").await?; assert_eq!(1, entries.len()); - assert_eq!(p2, entries[0].path()); + assert_eq!(p2, entries.first().unwrap().path()); let content = store.read(p2).await?; assert_eq!("Hello, object2!", String::from_utf8(content.to_vec())?); store.delete(p2).await?; - let entries = store - .list("/") - .await? - .into_iter() - .filter(|x| x.metadata().mode() == EntryMode::FILE) - .collect::>(); + let entries = store.list("/").await?; assert!(entries.is_empty()); assert!(store.read(p1).await.is_err()); @@ -267,7 +252,7 @@ async fn test_file_backend_with_lru_cache() -> Result<()> { async fn assert_lru_cache(cache_layer: &LruCacheLayer, file_names: &[&str]) { for file_name in file_names { - assert!(cache_layer.contains_file(file_name).await, "{file_name}"); + assert!(cache_layer.contains_file(file_name).await); } } @@ -279,9 +264,7 @@ async fn assert_cache_files( let (_, mut lister) = store.list("/", OpList::default()).await?; let mut objects = vec![]; while let Some(e) = lister.next().await? { - if e.mode() == EntryMode::FILE { - objects.push(e); - } + objects.push(e); } // compare the cache file with the expected cache file; ignore orders @@ -349,9 +332,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", ], &["Hello, object1!", "object2!", "Hello, object2!"], ) @@ -359,9 +342,9 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-", - "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=7-14", + "ecfe0dce85de452eb0a325158e7bfb75.cache-bytes=0-14", ], ) .await; @@ -372,13 +355,13 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_eq!(cache_layer.read_cache_stat().await, (1, 15)); assert_cache_files( &cache_store, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], &["Hello, object1!"], ) .await?; assert_lru_cache( &cache_layer, - &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-"], + &["6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14"], ) .await; @@ -405,8 +388,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_cache_files( &cache_store, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["Hello, object1!", "Hello, object3!", "Hello"], @@ -415,8 +398,8 @@ async fn test_object_store_cache_policy() -> Result<()> { assert_lru_cache( &cache_layer, &[ - "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", + "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=0-14", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -433,7 +416,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_store, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], &["ello, object1!", "Hello, object3!", "Hello"], @@ -443,7 +426,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) @@ -465,7 +448,7 @@ async fn test_object_store_cache_policy() -> Result<()> { &cache_layer, &[ "6d29752bdc6e4d5ba5483b96615d6c48.cache-bytes=1-14", - "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-", + "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-14", "a8b1dc21e24bb55974e3e68acc77ed52.cache-bytes=0-4", ], ) From 854a44172d8688c728971c13b8e40e61f725442e Mon Sep 17 00:00:00 2001 From: evenyag Date: Fri, 20 Dec 2024 12:53:00 +0800 Subject: [PATCH 59/59] chore: set version to 0.11.1 --- .github/workflows/release.yml | 2 +- Cargo.lock | 144 +++++++++++++++++----------------- Cargo.toml | 2 +- 3 files changed, 74 insertions(+), 74 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 3f46ef1a7bda..4f32298a8ba2 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -91,7 +91,7 @@ env: # The scheduled version is '${{ env.NEXT_RELEASE_VERSION }}-nightly-YYYYMMDD', like v0.2.0-nigthly-20230313; NIGHTLY_RELEASE_PREFIX: nightly # Note: The NEXT_RELEASE_VERSION should be modified manually by every formal release. - NEXT_RELEASE_VERSION: v0.12.0 + NEXT_RELEASE_VERSION: v0.11.0 # Permission reference: https://docs.github.com/en/actions/using-jobs/assigning-permissions-to-jobs permissions: diff --git a/Cargo.lock b/Cargo.lock index 7f38d0d8b183..fcda44efaf87 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -188,7 +188,7 @@ checksum = "d301b3b94cb4b2f23d7917810addbbaff90738e0ca2be692bd027e70d7e0330c" [[package]] name = "api" -version = "0.12.0" +version = "0.11.1" dependencies = [ "common-base", "common-decimal", @@ -773,7 +773,7 @@ dependencies = [ [[package]] name = "auth" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "async-trait", @@ -1326,7 +1326,7 @@ dependencies = [ [[package]] name = "cache" -version = "0.12.0" +version = "0.11.1" dependencies = [ "catalog", "common-error", @@ -1360,7 +1360,7 @@ checksum = "37b2a672a2cb129a2e41c10b1224bb368f9f37a2b16b612598138befd7b37eb5" [[package]] name = "catalog" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arrow", @@ -1696,7 +1696,7 @@ checksum = "1462739cb27611015575c0c11df5df7601141071f07518d56fcc1be504cbec97" [[package]] name = "cli" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "auth", @@ -1739,7 +1739,7 @@ dependencies = [ "session", "snafu 0.8.5", "store-api", - "substrait 0.12.0", + "substrait 0.11.1", "table", "tempfile", "tokio", @@ -1748,7 +1748,7 @@ dependencies = [ [[package]] name = "client" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arc-swap", @@ -1775,7 +1775,7 @@ dependencies = [ "rand", "serde_json", "snafu 0.8.5", - "substrait 0.12.0", + "substrait 0.11.1", "substrait 0.37.3", "tokio", "tokio-stream", @@ -1816,7 +1816,7 @@ dependencies = [ [[package]] name = "cmd" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "auth", @@ -1876,7 +1876,7 @@ dependencies = [ "similar-asserts", "snafu 0.8.5", "store-api", - "substrait 0.12.0", + "substrait 0.11.1", "table", "temp-env", "tempfile", @@ -1928,7 +1928,7 @@ checksum = "55b672471b4e9f9e95499ea597ff64941a309b2cdbffcc46f2cc5e2d971fd335" [[package]] name = "common-base" -version = "0.12.0" +version = "0.11.1" dependencies = [ "anymap2", "async-trait", @@ -1950,11 +1950,11 @@ dependencies = [ [[package]] name = "common-catalog" -version = "0.12.0" +version = "0.11.1" [[package]] name = "common-config" -version = "0.12.0" +version = "0.11.1" dependencies = [ "common-base", "common-error", @@ -1977,7 +1977,7 @@ dependencies = [ [[package]] name = "common-datasource" -version = "0.12.0" +version = "0.11.1" dependencies = [ "arrow", "arrow-schema", @@ -2013,7 +2013,7 @@ dependencies = [ [[package]] name = "common-decimal" -version = "0.12.0" +version = "0.11.1" dependencies = [ "bigdecimal 0.4.5", "common-error", @@ -2026,7 +2026,7 @@ dependencies = [ [[package]] name = "common-error" -version = "0.12.0" +version = "0.11.1" dependencies = [ "snafu 0.8.5", "strum 0.25.0", @@ -2035,7 +2035,7 @@ dependencies = [ [[package]] name = "common-frontend" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "common-error", @@ -2045,7 +2045,7 @@ dependencies = [ [[package]] name = "common-function" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "approx 0.5.1", @@ -2089,7 +2089,7 @@ dependencies = [ [[package]] name = "common-greptimedb-telemetry" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "common-runtime", @@ -2106,7 +2106,7 @@ dependencies = [ [[package]] name = "common-grpc" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arrow-flight", @@ -2132,7 +2132,7 @@ dependencies = [ [[package]] name = "common-grpc-expr" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "common-base", @@ -2151,7 +2151,7 @@ dependencies = [ [[package]] name = "common-macro" -version = "0.12.0" +version = "0.11.1" dependencies = [ "arc-swap", "common-query", @@ -2165,7 +2165,7 @@ dependencies = [ [[package]] name = "common-mem-prof" -version = "0.12.0" +version = "0.11.1" dependencies = [ "common-error", "common-macro", @@ -2178,7 +2178,7 @@ dependencies = [ [[package]] name = "common-meta" -version = "0.12.0" +version = "0.11.1" dependencies = [ "anymap2", "api", @@ -2235,7 +2235,7 @@ dependencies = [ [[package]] name = "common-options" -version = "0.12.0" +version = "0.11.1" dependencies = [ "common-grpc", "humantime-serde", @@ -2244,11 +2244,11 @@ dependencies = [ [[package]] name = "common-plugins" -version = "0.12.0" +version = "0.11.1" [[package]] name = "common-pprof" -version = "0.12.0" +version = "0.11.1" dependencies = [ "common-error", "common-macro", @@ -2260,7 +2260,7 @@ dependencies = [ [[package]] name = "common-procedure" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-stream", "async-trait", @@ -2287,7 +2287,7 @@ dependencies = [ [[package]] name = "common-procedure-test" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "common-procedure", @@ -2295,7 +2295,7 @@ dependencies = [ [[package]] name = "common-query" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "async-trait", @@ -2321,7 +2321,7 @@ dependencies = [ [[package]] name = "common-recordbatch" -version = "0.12.0" +version = "0.11.1" dependencies = [ "arc-swap", "common-error", @@ -2340,7 +2340,7 @@ dependencies = [ [[package]] name = "common-runtime" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "clap 4.5.19", @@ -2368,7 +2368,7 @@ dependencies = [ [[package]] name = "common-telemetry" -version = "0.12.0" +version = "0.11.1" dependencies = [ "atty", "backtrace", @@ -2396,7 +2396,7 @@ dependencies = [ [[package]] name = "common-test-util" -version = "0.12.0" +version = "0.11.1" dependencies = [ "client", "common-query", @@ -2408,7 +2408,7 @@ dependencies = [ [[package]] name = "common-time" -version = "0.12.0" +version = "0.11.1" dependencies = [ "arrow", "chrono", @@ -2426,7 +2426,7 @@ dependencies = [ [[package]] name = "common-version" -version = "0.12.0" +version = "0.11.1" dependencies = [ "build-data", "const_format", @@ -2436,7 +2436,7 @@ dependencies = [ [[package]] name = "common-wal" -version = "0.12.0" +version = "0.11.1" dependencies = [ "common-base", "common-error", @@ -3235,7 +3235,7 @@ dependencies = [ [[package]] name = "datanode" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arrow-flight", @@ -3286,7 +3286,7 @@ dependencies = [ "session", "snafu 0.8.5", "store-api", - "substrait 0.12.0", + "substrait 0.11.1", "table", "tokio", "toml 0.8.19", @@ -3295,7 +3295,7 @@ dependencies = [ [[package]] name = "datatypes" -version = "0.12.0" +version = "0.11.1" dependencies = [ "arrow", "arrow-array", @@ -3919,7 +3919,7 @@ dependencies = [ [[package]] name = "file-engine" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "async-trait", @@ -4035,7 +4035,7 @@ checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" [[package]] name = "flow" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arrow", @@ -4093,7 +4093,7 @@ dependencies = [ "snafu 0.8.5", "store-api", "strum 0.25.0", - "substrait 0.12.0", + "substrait 0.11.1", "table", "tokio", "tonic 0.11.0", @@ -4131,7 +4131,7 @@ checksum = "6c2141d6d6c8512188a7891b4b01590a45f6dac67afb4f255c4124dbb86d4eaa" [[package]] name = "frontend" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arc-swap", @@ -5280,7 +5280,7 @@ dependencies = [ [[package]] name = "index" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "asynchronous-codec", @@ -6129,7 +6129,7 @@ checksum = "a7a70ba024b9dc04c27ea2f0c0548feb474ec5c54bba33a7f72f873a39d07b24" [[package]] name = "log-query" -version = "0.12.0" +version = "0.11.1" dependencies = [ "chrono", "common-error", @@ -6140,7 +6140,7 @@ dependencies = [ [[package]] name = "log-store" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-stream", "async-trait", @@ -6484,7 +6484,7 @@ dependencies = [ [[package]] name = "meta-client" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "async-trait", @@ -6511,7 +6511,7 @@ dependencies = [ [[package]] name = "meta-srv" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "async-trait", @@ -6590,7 +6590,7 @@ dependencies = [ [[package]] name = "metric-engine" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "aquamarine", @@ -6684,7 +6684,7 @@ dependencies = [ [[package]] name = "mito2" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "aquamarine", @@ -7421,7 +7421,7 @@ dependencies = [ [[package]] name = "object-store" -version = "0.12.0" +version = "0.11.1" dependencies = [ "anyhow", "bytes", @@ -7674,7 +7674,7 @@ dependencies = [ [[package]] name = "operator" -version = "0.12.0" +version = "0.11.1" dependencies = [ "ahash 0.8.11", "api", @@ -7722,7 +7722,7 @@ dependencies = [ "sql", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "store-api", - "substrait 0.12.0", + "substrait 0.11.1", "table", "tokio", "tokio-util", @@ -7972,7 +7972,7 @@ dependencies = [ [[package]] name = "partition" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "async-trait", @@ -8258,7 +8258,7 @@ checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" [[package]] name = "pipeline" -version = "0.12.0" +version = "0.11.1" dependencies = [ "ahash 0.8.11", "api", @@ -8420,7 +8420,7 @@ dependencies = [ [[package]] name = "plugins" -version = "0.12.0" +version = "0.11.1" dependencies = [ "auth", "clap 4.5.19", @@ -8708,7 +8708,7 @@ dependencies = [ [[package]] name = "promql" -version = "0.12.0" +version = "0.11.1" dependencies = [ "ahash 0.8.11", "async-trait", @@ -8943,7 +8943,7 @@ dependencies = [ [[package]] name = "puffin" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-compression 0.4.13", "async-trait", @@ -9068,7 +9068,7 @@ dependencies = [ [[package]] name = "query" -version = "0.12.0" +version = "0.11.1" dependencies = [ "ahash 0.8.11", "api", @@ -9131,7 +9131,7 @@ dependencies = [ "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "statrs", "store-api", - "substrait 0.12.0", + "substrait 0.11.1", "table", "tokio", "tokio-stream", @@ -10615,7 +10615,7 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "script" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arc-swap", @@ -10907,7 +10907,7 @@ dependencies = [ [[package]] name = "servers" -version = "0.12.0" +version = "0.11.1" dependencies = [ "ahash 0.8.11", "api", @@ -11018,7 +11018,7 @@ dependencies = [ [[package]] name = "session" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arc-swap", @@ -11372,7 +11372,7 @@ dependencies = [ [[package]] name = "sql" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "chrono", @@ -11436,7 +11436,7 @@ dependencies = [ [[package]] name = "sqlness-runner" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "clap 4.5.19", @@ -11654,7 +11654,7 @@ dependencies = [ [[package]] name = "store-api" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "aquamarine", @@ -11816,7 +11816,7 @@ dependencies = [ [[package]] name = "substrait" -version = "0.12.0" +version = "0.11.1" dependencies = [ "async-trait", "bytes", @@ -12015,7 +12015,7 @@ dependencies = [ [[package]] name = "table" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "async-trait", @@ -12292,7 +12292,7 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" [[package]] name = "tests-fuzz" -version = "0.12.0" +version = "0.11.1" dependencies = [ "arbitrary", "async-trait", @@ -12335,7 +12335,7 @@ dependencies = [ [[package]] name = "tests-integration" -version = "0.12.0" +version = "0.11.1" dependencies = [ "api", "arrow-flight", @@ -12399,7 +12399,7 @@ dependencies = [ "sql", "sqlx", "store-api", - "substrait 0.12.0", + "substrait 0.11.1", "table", "tempfile", "time", diff --git a/Cargo.toml b/Cargo.toml index 990bc71a907b..7ab000c6bcdd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -68,7 +68,7 @@ members = [ resolver = "2" [workspace.package] -version = "0.12.0" +version = "0.11.1" edition = "2021" license = "Apache-2.0"