From a218f12bd98dce80d9c78643dee3ce6e733b1629 Mon Sep 17 00:00:00 2001 From: Weny Xu Date: Thu, 7 Mar 2024 14:51:19 +0800 Subject: [PATCH] test: add fuzz test for create table (#3441) * feat: add create table fuzz test * chore: add ci cfg for fuzz tests * refactor: remove redundant nightly config * chore: run fuzz test in debug mode * chore: use ubuntu-latest * fix: close connection * chore: add cache in fuzz test ci * chore: apply suggestion from CR * chore: apply suggestion from CR * chore: refactor the fuzz test action --- .env.example | 3 + .github/actions/fuzz-test/action.yaml | 13 +++ .github/workflows/develop.yml | 40 +++++++++ .gitignore | 4 + Cargo.lock | 34 ++++++++ tests-fuzz/Cargo.toml | 20 +++++ tests-fuzz/README.md | 41 +++++++++ tests-fuzz/src/error.rs | 8 ++ tests-fuzz/src/generator.rs | 10 ++- tests-fuzz/src/generator/alter_expr.rs | 8 +- tests-fuzz/src/generator/create_expr.rs | 2 +- tests-fuzz/src/lib.rs | 1 + tests-fuzz/src/utils.rs | 42 +++++++++ tests-fuzz/targets/fuzz_create_table.rs | 108 ++++++++++++++++++++++++ 14 files changed, 328 insertions(+), 6 deletions(-) create mode 100644 .github/actions/fuzz-test/action.yaml create mode 100644 tests-fuzz/README.md create mode 100644 tests-fuzz/src/utils.rs create mode 100644 tests-fuzz/targets/fuzz_create_table.rs diff --git a/.env.example b/.env.example index 2f51a7cc6559..369ebb8e2f43 100644 --- a/.env.example +++ b/.env.example @@ -21,3 +21,6 @@ GT_GCS_CREDENTIAL_PATH = GCS credential path GT_GCS_ENDPOINT = GCS end point # Settings for kafka wal test GT_KAFKA_ENDPOINTS = localhost:9092 + +# Setting for fuzz tests +GT_MYSQL_ADDR = localhost:4002 diff --git a/.github/actions/fuzz-test/action.yaml b/.github/actions/fuzz-test/action.yaml new file mode 100644 index 000000000000..d50d5be6ef26 --- /dev/null +++ b/.github/actions/fuzz-test/action.yaml @@ -0,0 +1,13 @@ +name: Fuzz Test +description: 'Fuzz test given setup and service' +inputs: + target: + description: "The fuzz target to test" +runs: + using: composite + steps: + - name: Run Fuzz Test + shell: bash + run: cargo fuzz run ${{ inputs.target }} --fuzz-dir tests-fuzz -D -s none -- -max_total_time=120 + env: + GT_MYSQL_ADDR: 127.0.0.1:4002 diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index 3b6975a14919..94ada0fabf73 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -117,6 +117,46 @@ jobs: artifacts-dir: bins version: current + fuzztest: + name: Fuzz Test + needs: build + runs-on: ubuntu-latest + strategy: + matrix: + target: [ "fuzz_create_table" ] + steps: + - uses: actions/checkout@v4 + - uses: arduino/setup-protoc@v3 + - uses: dtolnay/rust-toolchain@master + with: + toolchain: ${{ env.RUST_TOOLCHAIN }} + - name: Rust Cache + uses: Swatinem/rust-cache@v2 + with: + # Shares across multiple jobs + shared-key: "fuzz-test-targets" + - name: Set Rust Fuzz + shell: bash + run: | + sudo apt update && sudo apt install -y libfuzzer-14-dev + cargo install cargo-fuzz + - name: Download pre-built binaries + uses: actions/download-artifact@v4 + with: + name: bins + path: . + - name: Unzip binaries + run: tar -xvf ./bins.tar.gz + - name: Run GreptimeDB + run: | + ./bins/greptime standalone start& + - name: Fuzz Test + uses: ./.github/actions/fuzz-test + env: + CUSTOM_LIBFUZZER_PATH: /usr/lib/llvm-14/lib/libFuzzer.a + with: + target: ${{ matrix.target }} + sqlness: name: Sqlness Test needs: build diff --git a/.gitignore b/.gitignore index 4db155f85ff3..c1b0a8961845 100644 --- a/.gitignore +++ b/.gitignore @@ -46,3 +46,7 @@ benchmarks/data *.code-workspace venv/ + +# Fuzz tests +tests-fuzz/artifacts/ +tests-fuzz/corpus/ diff --git a/Cargo.lock b/Cargo.lock index 8d61b37c6c57..c2e8b8b9f29e 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -252,6 +252,15 @@ dependencies = [ "syn 1.0.109", ] +[[package]] +name = "arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110" +dependencies = [ + "derive_arbitrary", +] + [[package]] name = "arc-swap" version = "1.6.0" @@ -2951,6 +2960,17 @@ dependencies = [ "syn 2.0.43", ] +[[package]] +name = "derive_arbitrary" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.43", +] + [[package]] name = "derive_builder" version = "0.11.2" @@ -4799,6 +4819,17 @@ version = "0.2.151" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "302d7ab3130588088d277783b1e2d2e10c9e9e4a16dd9050e6ec93fb3e7048f4" +[[package]] +name = "libfuzzer-sys" +version = "0.4.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a96cfd5557eb82f2b83fed4955246c988d331975a002961b07c81584d107e7f7" +dependencies = [ + "arbitrary", + "cc", + "once_cell", +] + [[package]] name = "libgit2-sys" version = "0.16.2+1.7.2" @@ -10178,15 +10209,18 @@ checksum = "3369f5ac52d5eb6ab48c6b4ffdc8efbcad6b89c765749064ba298f2c68a16a76" name = "tests-fuzz" version = "0.7.0" dependencies = [ + "arbitrary", "async-trait", "common-error", "common-macro", "common-query", + "common-runtime", "common-telemetry", "datatypes", "derive_builder 0.12.0", "dotenv", "lazy_static", + "libfuzzer-sys", "partition", "rand", "rand_chacha", diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index e16e406587c2..5cf789eb5474 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -7,15 +7,22 @@ license.workspace = true [lints] workspace = true +[package.metadata] +cargo-fuzz = true + [dependencies] +arbitrary = { version = "1.3.0", features = ["derive"] } async-trait = { workspace = true } common-error = { workspace = true } common-macro = { workspace = true } common-query = { workspace = true } +common-runtime = { workspace = true } common-telemetry = { workspace = true } datatypes = { workspace = true } derive_builder = { workspace = true } +dotenv = "0.15" lazy_static = { workspace = true } +libfuzzer-sys = "0.4" partition = { workspace = true } rand = { workspace = true } rand_chacha = "0.3.1" @@ -24,6 +31,12 @@ serde_json = { workspace = true } snafu = { workspace = true } sql = { workspace = true } sqlparser.workspace = true +sqlx = { version = "0.6", features = [ + "runtime-tokio-rustls", + "mysql", + "postgres", + "chrono", +] } [dev-dependencies] dotenv = "0.15" @@ -34,3 +47,10 @@ sqlx = { version = "0.6", features = [ "chrono", ] } tokio = { workspace = true } + +[[bin]] +name = "fuzz_create_table" +path = "targets/fuzz_create_table.rs" +test = false +bench = false +doc = false diff --git a/tests-fuzz/README.md b/tests-fuzz/README.md new file mode 100644 index 000000000000..c1e2147fb4bd --- /dev/null +++ b/tests-fuzz/README.md @@ -0,0 +1,41 @@ +# Fuzz Test for GreptimeDB + +## Setup +1. Install the [fuzz](https://rust-fuzz.github.io/book/cargo-fuzz/setup.html) cli first. +```bash +cargo install cargo-fuzz +``` + +2. Start GreptimeDB +3. Copy the `.env.example`, which is at project root, to `.env` and change the values on need. + +## Run +1. List all fuzz targets +```bash +cargo fuzz list --fuzz-dir tests-fuzz +``` + +2. Run a fuzz target. +```bash +cargo fuzz run fuzz_create_table --fuzz-dir tests-fuzz +``` + +## Crash Reproduction +If you want to reproduce a crash, you first need to obtain the Base64 encoded code, which usually appears at the end of a crash report, and store it in a file. + +Alternatively, if you already have the crash file, you can skip this step. + +```bash +echo "Base64" > .crash +``` +Print the `std::fmt::Debug` output for an input. + +```bash +cargo fuzz fmt fuzz_target .crash --fuzz-dir tests-fuzz +``` +Rerun the fuzz test with the input. + +```bash +cargo fuzz run fuzz_target .crash --fuzz-dir tests-fuzz +``` +For more details, visit [cargo fuzz](https://rust-fuzz.github.io/book/cargo-fuzz/tutorial.html) or run the command `cargo fuzz --help`. diff --git a/tests-fuzz/src/error.rs b/tests-fuzz/src/error.rs index 89fdf127716a..9cf7728b81d2 100644 --- a/tests-fuzz/src/error.rs +++ b/tests-fuzz/src/error.rs @@ -38,4 +38,12 @@ pub enum Error { #[snafu(display("No droppable columns"))] DroppableColumns { location: Location }, + + #[snafu(display("Failed to execute query: {}", sql))] + ExecuteQuery { + sql: String, + #[snafu(source)] + error: sqlx::error::Error, + location: Location, + }, } diff --git a/tests-fuzz/src/generator.rs b/tests-fuzz/src/generator.rs index b2284e7927da..c60720695a8c 100644 --- a/tests-fuzz/src/generator.rs +++ b/tests-fuzz/src/generator.rs @@ -57,7 +57,15 @@ macro_rules! impl_random { ($type: ident, $value:ident, $values: ident) => { impl Random<$type, R> for $value { fn choose(&self, rng: &mut R, amount: usize) -> Vec<$type> { - $values.choose_multiple(rng, amount).cloned().collect() + // Collects the elements in deterministic order first. + let mut result = std::collections::BTreeSet::new(); + while result.len() != amount { + result.insert($values.choose(rng).unwrap().clone()); + } + let mut result = result.into_iter().collect::>(); + // Shuffles the result slice. + result.shuffle(rng); + result } } }; diff --git a/tests-fuzz/src/generator/alter_expr.rs b/tests-fuzz/src/generator/alter_expr.rs index aff122133077..a284107ee152 100644 --- a/tests-fuzz/src/generator/alter_expr.rs +++ b/tests-fuzz/src/generator/alter_expr.rs @@ -155,7 +155,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":"DigNissIMOS","alter_options":{"AddColumn":{"column":{"name":"sit","column_type":{"Boolean":null},"options":["PrimaryKey"]},"location":null}}}"#; + let expected = r#"{"table_name":"animI","alter_options":{"AddColumn":{"column":{"name":"velit","column_type":{"Int32":{}},"options":[{"DefaultValue":{"Int32":853246610}}]},"location":null}}}"#; assert_eq!(expected, serialized); let expr = AlterExprRenameGeneratorBuilder::default() @@ -165,7 +165,8 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":"DigNissIMOS","alter_options":{"RenameTable":{"new_table_name":"excepturi"}}}"#; + let expected = + r#"{"table_name":"animI","alter_options":{"RenameTable":{"new_table_name":"iure"}}}"#; assert_eq!(expected, serialized); let expr = AlterExprDropColumnGeneratorBuilder::default() @@ -175,8 +176,7 @@ mod tests { .generate(&mut rng) .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = - r#"{"table_name":"DigNissIMOS","alter_options":{"DropColumn":{"name":"INVentORE"}}}"#; + let expected = r#"{"table_name":"animI","alter_options":{"DropColumn":{"name":"toTAm"}}}"#; assert_eq!(expected, serialized); } } diff --git a/tests-fuzz/src/generator/create_expr.rs b/tests-fuzz/src/generator/create_expr.rs index 1ea56f92790e..f473e8706cbc 100644 --- a/tests-fuzz/src/generator/create_expr.rs +++ b/tests-fuzz/src/generator/create_expr.rs @@ -230,7 +230,7 @@ mod tests { .unwrap(); let serialized = serde_json::to_string(&expr).unwrap(); - let expected = r#"{"table_name":"iN","columns":[{"name":"CUlpa","column_type":{"Int16":{}},"options":["PrimaryKey","NotNull"]},{"name":"dEBiTiS","column_type":{"Timestamp":{"Second":null}},"options":["TimeIndex"]},{"name":"HArum","column_type":{"Int16":{}},"options":["NotNull"]},{"name":"NObIS","column_type":{"Int32":{}},"options":["PrimaryKey"]},{"name":"IMPEDiT","column_type":{"Int16":{}},"options":[{"DefaultValue":{"Int16":-25151}}]},{"name":"bLanDITIis","column_type":{"Boolean":null},"options":[{"DefaultValue":{"Boolean":true}}]},{"name":"Dolores","column_type":{"Float32":{}},"options":["PrimaryKey"]},{"name":"eSt","column_type":{"Float32":{}},"options":[{"DefaultValue":{"Float32":0.9152612}}]},{"name":"INVentORE","column_type":{"Int64":{}},"options":["PrimaryKey"]},{"name":"aDIpiSci","column_type":{"Float64":{}},"options":["Null"]}],"if_not_exists":true,"partition":{"partition_columns":["CUlpa"],"partition_bounds":[{"Value":{"Int16":15966}},{"Value":{"Int16":31925}},"MaxValue"]},"engine":"mito2","options":{},"primary_keys":[6,0,8,3]}"#; + let expected = r#"{"table_name":"tEmporIbUS","columns":[{"name":"IMpEdIT","column_type":{"String":null},"options":["PrimaryKey","NotNull"]},{"name":"natuS","column_type":{"Timestamp":{"Nanosecond":null}},"options":["TimeIndex"]},{"name":"ADIPisCI","column_type":{"Int16":{}},"options":[{"DefaultValue":{"Int16":4864}}]},{"name":"EXpEdita","column_type":{"Int64":{}},"options":["PrimaryKey"]},{"name":"cUlpA","column_type":{"Float64":{}},"options":["NotNull"]},{"name":"MOLeStIAs","column_type":{"Boolean":null},"options":["Null"]},{"name":"cUmquE","column_type":{"Float32":{}},"options":[{"DefaultValue":{"Float32":0.21569687}}]},{"name":"toTAm","column_type":{"Float64":{}},"options":["NotNull"]},{"name":"deBitIs","column_type":{"Float32":{}},"options":["Null"]},{"name":"QUi","column_type":{"Int64":{}},"options":["Null"]}],"if_not_exists":true,"partition":{"partition_columns":["IMpEdIT"],"partition_bounds":[{"Value":{"String":"򟘲"}},{"Value":{"String":"򴥫"}},"MaxValue"]},"engine":"mito2","options":{},"primary_keys":[0,3]}"#; assert_eq!(expected, serialized); } } diff --git a/tests-fuzz/src/lib.rs b/tests-fuzz/src/lib.rs index 5c5ba00533b1..2666a35051c1 100644 --- a/tests-fuzz/src/lib.rs +++ b/tests-fuzz/src/lib.rs @@ -21,6 +21,7 @@ pub mod fake; pub mod generator; pub mod ir; pub mod translator; +pub mod utils; #[cfg(test)] pub mod test_utils; diff --git a/tests-fuzz/src/utils.rs b/tests-fuzz/src/utils.rs new file mode 100644 index 000000000000..7c50b0ac66cb --- /dev/null +++ b/tests-fuzz/src/utils.rs @@ -0,0 +1,42 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::env; + +use common_telemetry::info; +use sqlx::mysql::MySqlPoolOptions; +use sqlx::{MySql, Pool}; + +pub struct Connections { + pub mysql: Option>, +} + +const GT_MYSQL_ADDR: &str = "GT_MYSQL_ADDR"; + +pub async fn init_greptime_connections() -> Connections { + let _ = dotenv::dotenv(); + let mysql = if let Ok(addr) = env::var(GT_MYSQL_ADDR) { + Some( + MySqlPoolOptions::new() + .connect(&format!("mysql://{addr}/public")) + .await + .unwrap(), + ) + } else { + info!("GT_MYSQL_ADDR is empty, ignores test"); + None + }; + + Connections { mysql } +} diff --git a/tests-fuzz/targets/fuzz_create_table.rs b/tests-fuzz/targets/fuzz_create_table.rs new file mode 100644 index 000000000000..f3e3cdd7f252 --- /dev/null +++ b/tests-fuzz/targets/fuzz_create_table.rs @@ -0,0 +1,108 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![no_main] + +use common_telemetry::info; +use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured}; +use libfuzzer_sys::fuzz_target; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaChaRng; +use snafu::ResultExt; +use sqlx::{MySql, Pool}; +use tests_fuzz::error::{self, Result}; +use tests_fuzz::fake::{ + merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map, + MappedGenerator, WordGenerator, +}; +use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder; +use tests_fuzz::generator::Generator; +use tests_fuzz::ir::CreateTableExpr; +use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; +use tests_fuzz::translator::DslTranslator; +use tests_fuzz::utils::{init_greptime_connections, Connections}; + +struct FuzzContext { + greptime: Pool, +} + +impl FuzzContext { + async fn close(self) { + self.greptime.close().await; + } +} + +#[derive(Clone, Debug)] +struct FuzzInput { + seed: u64, + columns: usize, +} + +impl Arbitrary<'_> for FuzzInput { + fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { + let seed = u.int_in_range(u64::MIN..=u64::MAX)?; + let columns = u.int_in_range(2..=10)?; + Ok(FuzzInput { columns, seed }) + } +} + +fn generate_expr(input: FuzzInput) -> Result { + let mut rng = ChaChaRng::seed_from_u64(input.seed); + let create_table_generator = CreateTableExprGeneratorBuilder::default() + .name_generator(Box::new(MappedGenerator::new( + WordGenerator, + merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map), + ))) + .columns(rng.gen_range(1..input.columns)) + .engine("mito") + .build() + .unwrap(); + create_table_generator.generate(&mut rng) +} + +async fn execute_create_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> { + info!("input: {input:?}"); + let expr = generate_expr(input)?; + let translator = CreateTableExprTranslator; + let sql = translator.translate(&expr)?; + let result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql: &sql })?; + info!("Create table: {sql}, result: {result:?}"); + + // Cleans up + let sql = format!("DROP TABLE {}", expr.table_name); + let result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql })?; + info!("Drop table: {}, result: {result:?}", expr.table_name); + ctx.close().await; + + Ok(()) +} + +fuzz_target!(|input: FuzzInput| { + common_telemetry::init_default_ut_logging(); + common_runtime::block_on_write(async { + let Connections { mysql } = init_greptime_connections().await; + let ctx = FuzzContext { + greptime: mysql.expect("mysql connection init must be succeed"), + }; + execute_create_table(ctx, input) + .await + .unwrap_or_else(|err| panic!("fuzz test must be succeed: {err:?}")); + }) +});