diff --git a/.github/workflows/develop.yml b/.github/workflows/develop.yml index aab40c7473f4..fd9077403ec3 100644 --- a/.github/workflows/develop.yml +++ b/.github/workflows/develop.yml @@ -130,7 +130,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database" ] + target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table" ] steps: - uses: actions/checkout@v4 - uses: arduino/setup-protoc@v3 diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index fbf86bcb3977..5b217a0a339a 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -50,6 +50,13 @@ test = false bench = false doc = false +[[bin]] +name = "fuzz_create_logical_table" +path = "targets/fuzz_create_logical_table.rs" +test = false +bench = false +doc = false + [[bin]] name = "fuzz_insert" path = "targets/fuzz_insert.rs" diff --git a/tests-fuzz/src/generator/create_expr.rs b/tests-fuzz/src/generator/create_expr.rs index d1d4093de2de..57f8966844ff 100644 --- a/tests-fuzz/src/generator/create_expr.rs +++ b/tests-fuzz/src/generator/create_expr.rs @@ -14,6 +14,7 @@ use std::collections::HashMap; +use datatypes::data_type::ConcreteDataType; use datatypes::value::Value; use derive_builder::Builder; use partition::partition::{PartitionBound, PartitionDef}; @@ -22,14 +23,16 @@ use rand::Rng; use snafu::{ensure, ResultExt}; use super::Generator; +use crate::context::TableContextRef; use crate::error::{self, Error, Result}; use crate::fake::{random_capitalize_map, MappedGenerator, WordGenerator}; use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Random}; -use crate::ir::create_expr::{CreateDatabaseExprBuilder, CreateTableExprBuilder}; +use crate::ir::create_expr::{ColumnOption, CreateDatabaseExprBuilder, CreateTableExprBuilder}; use crate::ir::{ column_options_generator, generate_columns, generate_random_value, - partible_column_options_generator, ts_column_options_generator, ColumnTypeGenerator, - CreateDatabaseExpr, CreateTableExpr, Ident, PartibleColumnTypeGenerator, TsColumnTypeGenerator, + partible_column_options_generator, primary_key_options_generator, ts_column_options_generator, + Column, ColumnTypeGenerator, CreateDatabaseExpr, CreateTableExpr, Ident, + PartibleColumnTypeGenerator, StringColumnTypeGenerator, TsColumnTypeGenerator, }; #[derive(Builder)] @@ -200,6 +203,107 @@ impl Generator for CreateTableExprGenerato } } +/// Generate a physical table with 2 columns: ts of TimestampType::Millisecond as time index and val of Float64Type. +#[derive(Builder)] +#[builder(pattern = "owned")] +pub struct CreatePhysicalTableExprGenerator { + #[builder(default = "Box::new(WordGenerator)")] + name_generator: Box>, + #[builder(default = "false")] + if_not_exists: bool, +} + +impl Generator for CreatePhysicalTableExprGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + Ok(CreateTableExpr { + table_name: self.name_generator.gen(rng), + columns: vec![ + Column { + name: Ident::new("ts"), + column_type: ConcreteDataType::timestamp_millisecond_datatype(), + options: vec![ColumnOption::TimeIndex], + }, + Column { + name: Ident::new("val"), + column_type: ConcreteDataType::float64_datatype(), + options: vec![], + }, + ], + if_not_exists: self.if_not_exists, + partition: None, + engine: "metric".to_string(), + options: [("physical_metric_table".to_string(), "".into())].into(), + primary_keys: vec![], + }) + } +} + +/// Generate a logical table based on an existing physical table. +#[derive(Builder)] +#[builder(pattern = "owned")] +pub struct CreateLogicalTableExprGenerator { + physical_table_ctx: TableContextRef, + labels: usize, + if_not_exists: bool, + #[builder(default = "Box::new(WordGenerator)")] + name_generator: Box>, +} + +impl Generator for CreateLogicalTableExprGenerator { + type Error = Error; + + fn generate(&self, rng: &mut R) -> Result { + // Currently we mock the usage of GreptimeDB as Prometheus' backend, the physical table must have two columns. + ensure!( + self.physical_table_ctx.columns.len() == 2, + error::UnexpectedSnafu { + violated: "The physical table must have two columns" + } + ); + + // Generates the logical table columns based on the physical table. + let logical_table_name = self + .physical_table_ctx + .generate_unique_table_name(rng, self.name_generator.as_ref()); + let mut logical_table = CreateTableExpr { + table_name: logical_table_name, + columns: self.physical_table_ctx.columns.clone(), + if_not_exists: self.if_not_exists, + partition: None, + engine: "metric".to_string(), + options: [( + "on_physical_table".to_string(), + self.physical_table_ctx.name.value.clone().into(), + )] + .into(), + primary_keys: vec![], + }; + + let column_names = self.name_generator.choose(rng, self.labels); + logical_table.columns.extend(generate_columns( + rng, + column_names, + &StringColumnTypeGenerator, + Box::new(primary_key_options_generator), + )); + + // Currently only the `primary key` option is kept in physical table, + // so we only keep the `primary key` option in the logical table for fuzz test. + let mut primary_keys = vec![]; + for (idx, column) in logical_table.columns.iter().enumerate() { + if column.is_primary_key() { + primary_keys.push(idx); + } + } + primary_keys.shuffle(rng); + logical_table.primary_keys = primary_keys; + + Ok(logical_table) + } +} + #[derive(Builder)] #[builder(default, pattern = "owned")] pub struct CreateDatabaseExprGenerator { @@ -236,10 +340,14 @@ impl Generator for CreateDatabaseExprGe #[cfg(test)] mod tests { + use std::sync::Arc; + + use datatypes::data_type::ConcreteDataType; use datatypes::value::Value; use rand::SeedableRng; use super::*; + use crate::context::TableContext; #[test] fn test_float64() { @@ -296,6 +404,95 @@ mod tests { assert_eq!(expected, serialized); } + #[test] + fn test_create_logical_table_expr_generator() { + let mut rng = rand::thread_rng(); + + let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default() + .if_not_exists(false) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + assert_eq!(physical_table_expr.engine, "metric"); + assert_eq!(physical_table_expr.columns.len(), 2); + + let physical_ts = physical_table_expr.columns.iter().position(|column| { + column + .options + .iter() + .any(|option| option == &ColumnOption::TimeIndex) + }); + let physical_ts_name = physical_table_expr.columns[physical_ts.unwrap()] + .name + .value + .to_string(); + + let physical_table_ctx = Arc::new(TableContext::from(&physical_table_expr)); + + let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default() + .physical_table_ctx(physical_table_ctx) + .labels(5) + .if_not_exists(false) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let logical_ts = logical_table_expr.columns.iter().position(|column| { + column + .options + .iter() + .any(|option| option == &ColumnOption::TimeIndex) + }); + let logical_ts_name = logical_table_expr.columns[logical_ts.unwrap()] + .name + .value + .to_string(); + + assert_eq!(logical_table_expr.engine, "metric"); + assert_eq!(logical_table_expr.columns.len(), 7); + assert_eq!(logical_ts_name, physical_ts_name); + assert!(logical_table_expr + .columns + .iter() + .all( + |column| column.column_type != ConcreteDataType::string_datatype() + || column + .options + .iter() + .any(|option| option == &ColumnOption::PrimaryKey) + )); + } + + #[test] + fn test_create_logical_table_expr_generator_deterministic() { + let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0); + let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default() + .if_not_exists(false) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + let physical_table_serialized = serde_json::to_string(&physical_table_expr).unwrap(); + let physical_table_expected = r#"{"table_name":{"value":"expedita","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"physical_metric_table":{"String":""}},"primary_keys":[]}"#; + assert_eq!(physical_table_expected, physical_table_serialized); + + let physical_table_ctx = Arc::new(TableContext::from(&physical_table_expr)); + + let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default() + .physical_table_ctx(physical_table_ctx) + .labels(5) + .if_not_exists(false) + .build() + .unwrap() + .generate(&mut rng) + .unwrap(); + + let logical_table_serialized = serde_json::to_string(&logical_table_expr).unwrap(); + let logical_table_expected = r#"{"table_name":{"value":"impedit","quote_style":null},"columns":[{"name":{"value":"ts","quote_style":null},"column_type":{"Timestamp":{"Millisecond":null}},"options":["TimeIndex"]},{"name":{"value":"val","quote_style":null},"column_type":{"Float64":{}},"options":[]},{"name":{"value":"qui","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"totam","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"molestias","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"natus","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"cumque","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"on_physical_table":{"String":"expedita"}},"primary_keys":[2,5,3,6,4]}"#; + assert_eq!(logical_table_expected, logical_table_serialized); + } + #[test] fn test_create_database_expr_generator() { let mut rng = rand::thread_rng(); diff --git a/tests-fuzz/src/ir.rs b/tests-fuzz/src/ir.rs index 50ae1d216f68..eb6ee105b2c8 100644 --- a/tests-fuzz/src/ir.rs +++ b/tests-fuzz/src/ir.rs @@ -63,6 +63,8 @@ lazy_static! { ConcreteDataType::date_datatype(), ConcreteDataType::datetime_datatype(), ]; + pub static ref STRING_DATA_TYPES: Vec = + vec![ConcreteDataType::string_datatype()]; } impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES); @@ -72,10 +74,16 @@ impl_random!( PartibleColumnTypeGenerator, PARTIBLE_DATA_TYPES ); +impl_random!( + ConcreteDataType, + StringColumnTypeGenerator, + STRING_DATA_TYPES +); pub struct ColumnTypeGenerator; pub struct TsColumnTypeGenerator; pub struct PartibleColumnTypeGenerator; +pub struct StringColumnTypeGenerator; /// Generates a random [Value]. pub fn generate_random_value( @@ -318,6 +326,20 @@ pub fn ts_column_options_generator( vec![ColumnOption::TimeIndex] } +pub fn primary_key_and_not_null_column_options_generator( + _: &mut R, + _: &ConcreteDataType, +) -> Vec { + vec![ColumnOption::PrimaryKey, ColumnOption::NotNull] +} + +pub fn primary_key_options_generator( + _: &mut R, + _: &ConcreteDataType, +) -> Vec { + vec![ColumnOption::PrimaryKey] +} + /// Generates columns with given `names`. pub fn generate_columns( rng: &mut R, diff --git a/tests-fuzz/targets/fuzz_create_logical_table.rs b/tests-fuzz/targets/fuzz_create_logical_table.rs new file mode 100644 index 000000000000..e66ea4518966 --- /dev/null +++ b/tests-fuzz/targets/fuzz_create_logical_table.rs @@ -0,0 +1,195 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#![no_main] + +use std::sync::Arc; + +use common_telemetry::info; +use datatypes::data_type::ConcreteDataType; +use libfuzzer_sys::arbitrary::{Arbitrary, Unstructured}; +use libfuzzer_sys::fuzz_target; +use rand::{Rng, SeedableRng}; +use rand_chacha::ChaChaRng; +use snafu::ResultExt; +use sqlx::{MySql, Pool}; +use tests_fuzz::context::TableContext; +use tests_fuzz::error::{self, Result}; +use tests_fuzz::fake::{ + merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map, + MappedGenerator, WordGenerator, +}; +use tests_fuzz::generator::create_expr::{ + CreateLogicalTableExprGeneratorBuilder, CreatePhysicalTableExprGeneratorBuilder, +}; +use tests_fuzz::generator::Generator; +use tests_fuzz::ir::{primary_key_and_not_null_column_options_generator, Column}; +use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; +use tests_fuzz::translator::DslTranslator; +use tests_fuzz::utils::{init_greptime_connections, Connections}; +use tests_fuzz::validator; + +struct FuzzContext { + greptime: Pool, +} + +impl FuzzContext { + async fn close(self) { + self.greptime.close().await; + } +} + +#[derive(Clone, Debug)] +struct FuzzInput { + seed: u64, +} + +impl Arbitrary<'_> for FuzzInput { + fn arbitrary(u: &mut Unstructured<'_>) -> arbitrary::Result { + let seed = u.int_in_range(u64::MIN..=u64::MAX)?; + Ok(FuzzInput { seed }) + } +} + +async fn execute_create_logic_table(ctx: FuzzContext, input: FuzzInput) -> Result<()> { + info!("input: {input:?}"); + let mut rng = ChaChaRng::seed_from_u64(input.seed); + + // Create physical table + let physical_table_if_not_exists = rng.gen_bool(0.5); + let create_physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default() + .name_generator(Box::new(MappedGenerator::new( + WordGenerator, + merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map), + ))) + .if_not_exists(physical_table_if_not_exists) + .build() + .unwrap() + .generate(&mut rng)?; + let translator = CreateTableExprTranslator; + let sql = translator.translate(&create_physical_table_expr)?; + let result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql: &sql })?; + info!("Create physical table: {sql}, result: {result:?}"); + + let mut physical_table_columns = create_physical_table_expr.columns.clone(); + physical_table_columns.push({ + let column_type = ConcreteDataType::uint64_datatype(); + let options = primary_key_and_not_null_column_options_generator(&mut rng, &column_type); + Column { + name: "__tsid".into(), + column_type, + options, + } + }); + physical_table_columns.push({ + let column_type = ConcreteDataType::uint32_datatype(); + let options = primary_key_and_not_null_column_options_generator(&mut rng, &column_type); + Column { + name: "__table_id".into(), + column_type, + options, + } + }); + + // Create logical table + let physical_table_ctx = Arc::new(TableContext::from(&create_physical_table_expr)); + let labels = rng.gen_range(1..=5); + let logical_table_if_not_exists = rng.gen_bool(0.5); + + let create_logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default() + .name_generator(Box::new(MappedGenerator::new( + WordGenerator, + merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map), + ))) + .physical_table_ctx(physical_table_ctx) + .labels(labels) + .if_not_exists(logical_table_if_not_exists) + .build() + .unwrap() + .generate(&mut rng)?; + let sql = translator.translate(&create_logical_table_expr)?; + let result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql: &sql })?; + info!("Create logical table: {sql}, result: {result:?}"); + + // Validate columns in logical table + let mut column_entries = validator::column::fetch_columns( + &ctx.greptime, + "public".into(), + create_logical_table_expr.table_name.clone(), + ) + .await?; + column_entries.sort_by(|a, b| a.column_name.cmp(&b.column_name)); + let mut columns = create_logical_table_expr.columns.clone(); + columns.sort_by(|a, b| a.name.value.cmp(&b.name.value)); + validator::column::assert_eq(&column_entries, &columns)?; + + // Validate columns in physical table + columns.retain(|column| column.column_type == ConcreteDataType::string_datatype()); + physical_table_columns.append(&mut columns); + physical_table_columns.sort_by(|a, b| a.name.value.cmp(&b.name.value)); + + let mut column_entries = validator::column::fetch_columns( + &ctx.greptime, + "public".into(), + create_physical_table_expr.table_name.clone(), + ) + .await?; + column_entries.sort_by(|a, b| a.column_name.cmp(&b.column_name)); + validator::column::assert_eq(&column_entries, &physical_table_columns)?; + + // Clean up logical table + let sql = format!("DROP TABLE {}", create_logical_table_expr.table_name); + let result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql: &sql })?; + info!( + "Drop table: {}, result: {result:?}", + create_logical_table_expr.table_name + ); + + // Clean up physical table + let sql = format!("DROP TABLE {}", create_physical_table_expr.table_name); + let result = sqlx::query(&sql) + .execute(&ctx.greptime) + .await + .context(error::ExecuteQuerySnafu { sql })?; + info!( + "Drop table: {}, result: {result:?}", + create_physical_table_expr.table_name + ); + + ctx.close().await; + + Ok(()) +} + +fuzz_target!(|input: FuzzInput| { + common_telemetry::init_default_ut_logging(); + common_runtime::block_on_write(async { + let Connections { mysql } = init_greptime_connections().await; + let ctx = FuzzContext { + greptime: mysql.expect("mysql connection init must be succeed"), + }; + execute_create_logic_table(ctx, input) + .await + .unwrap_or_else(|err| panic!("fuzz test must be succeed: {err:?}")); + }) +});