From cfae276d37a65d14250185cd3d376a305078fdbd Mon Sep 17 00:00:00 2001 From: Yohan Wal <59358312+CookiePieWw@users.noreply.github.com> Date: Wed, 15 May 2024 15:05:51 +0800 Subject: [PATCH] feat(fuzz): add validator for inserted rows (#3932) * feat(fuzz): add validator for inserted rows * fix: compatibility with mysql types * feat(fuzz): add datetime and date type in mysql for row validator --- Cargo.lock | 1 + tests-fuzz/Cargo.toml | 1 + tests-fuzz/src/generator.rs | 6 +- tests-fuzz/src/generator/insert_expr.rs | 13 +- tests-fuzz/src/ir.rs | 76 ++++++++ tests-fuzz/src/ir/insert_expr.rs | 48 +++++- .../src/translator/mysql/insert_expr.rs | 2 +- tests-fuzz/src/validator.rs | 1 + tests-fuzz/src/validator/row.rs | 162 ++++++++++++++++++ tests-fuzz/targets/fuzz_insert.rs | 39 ++++- .../targets/fuzz_insert_logical_table.rs | 38 +++- 11 files changed, 373 insertions(+), 14 deletions(-) create mode 100644 tests-fuzz/src/validator/row.rs diff --git a/Cargo.lock b/Cargo.lock index bee46fe01b3e..e2b79e083557 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -10608,6 +10608,7 @@ version = "0.7.2" dependencies = [ "arbitrary", "async-trait", + "chrono", "common-error", "common-macro", "common-query", diff --git a/tests-fuzz/Cargo.toml b/tests-fuzz/Cargo.toml index c7e733448988..22a44e5cb094 100644 --- a/tests-fuzz/Cargo.toml +++ b/tests-fuzz/Cargo.toml @@ -17,6 +17,7 @@ unstable = ["nix"] [dependencies] arbitrary = { version = "1.3.0", features = ["derive"] } async-trait = { workspace = true } +chrono = { workspace = true } common-error = { workspace = true } common-macro = { workspace = true } common-query = { workspace = true } diff --git a/tests-fuzz/src/generator.rs b/tests-fuzz/src/generator.rs index 2f9de0770c98..f4da5ae2353a 100644 --- a/tests-fuzz/src/generator.rs +++ b/tests-fuzz/src/generator.rs @@ -20,11 +20,12 @@ pub mod select_expr; use std::fmt; use datatypes::data_type::ConcreteDataType; +use datatypes::value::Value; use rand::Rng; use crate::error::Error; use crate::ir::create_expr::ColumnOption; -use crate::ir::{AlterTableExpr, CreateTableExpr}; +use crate::ir::{AlterTableExpr, CreateTableExpr, Ident}; pub type CreateTableExprGenerator = Box + Sync + Send>; @@ -36,6 +37,9 @@ pub type ColumnOptionGenerator = Box Vec pub type ConcreteDataTypeGenerator = Box>; +pub type ValueGenerator = + Box>) -> Value>; + pub trait Generator { type Error: Sync + Send + fmt::Debug; diff --git a/tests-fuzz/src/generator/insert_expr.rs b/tests-fuzz/src/generator/insert_expr.rs index 5af3289c0436..2549e6bdd01a 100644 --- a/tests-fuzz/src/generator/insert_expr.rs +++ b/tests-fuzz/src/generator/insert_expr.rs @@ -22,7 +22,7 @@ use rand::Rng; use crate::context::TableContextRef; use crate::error::{Error, Result}; use crate::fake::WordGenerator; -use crate::generator::{Generator, Random}; +use crate::generator::{Generator, Random, ValueGenerator}; use crate::ir::insert_expr::{InsertIntoExpr, RowValue}; use crate::ir::{generate_random_value, Ident}; @@ -37,6 +37,8 @@ pub struct InsertExprGenerator { rows: usize, #[builder(default = "Box::new(WordGenerator)")] word_generator: Box>, + #[builder(default = "Box::new(generate_random_value)")] + value_generator: ValueGenerator, #[builder(default)] _phantom: PhantomData, } @@ -81,7 +83,7 @@ impl Generator for InsertExprGenerator { continue; } - row.push(RowValue::Value(generate_random_value( + row.push(RowValue::Value((self.value_generator)( rng, &column.column_type, Some(self.word_generator.as_ref()), @@ -93,11 +95,8 @@ impl Generator for InsertExprGenerator { Ok(InsertIntoExpr { table_name: self.table_ctx.name.to_string(), - columns: if self.omit_column_list { - vec![] - } else { - values_columns - }, + omit_column_list: self.omit_column_list, + columns: values_columns, values_list, }) } diff --git a/tests-fuzz/src/ir.rs b/tests-fuzz/src/ir.rs index eb6ee105b2c8..39e9322e4c74 100644 --- a/tests-fuzz/src/ir.rs +++ b/tests-fuzz/src/ir.rs @@ -65,10 +65,21 @@ lazy_static! { ]; pub static ref STRING_DATA_TYPES: Vec = vec![ConcreteDataType::string_datatype()]; + pub static ref MYSQL_TS_DATA_TYPES: Vec = vec![ + // MySQL only permits fractional seconds with up to microseconds (6 digits) precision. + ConcreteDataType::timestamp_microsecond_datatype(), + ConcreteDataType::timestamp_millisecond_datatype(), + ConcreteDataType::timestamp_second_datatype(), + ]; } impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES); impl_random!(ConcreteDataType, TsColumnTypeGenerator, TS_DATA_TYPES); +impl_random!( + ConcreteDataType, + MySQLTsColumnTypeGenerator, + MYSQL_TS_DATA_TYPES +); impl_random!( ConcreteDataType, PartibleColumnTypeGenerator, @@ -82,6 +93,7 @@ impl_random!( pub struct ColumnTypeGenerator; pub struct TsColumnTypeGenerator; +pub struct MySQLTsColumnTypeGenerator; pub struct PartibleColumnTypeGenerator; pub struct StringColumnTypeGenerator; @@ -110,6 +122,31 @@ pub fn generate_random_value( } } +/// Generates a random [Value] for MySQL. +pub fn generate_random_value_for_mysql( + rng: &mut R, + datatype: &ConcreteDataType, + random_str: Option<&dyn Random>, +) -> Value { + match datatype { + &ConcreteDataType::Boolean(_) => Value::from(rng.gen::()), + ConcreteDataType::Int16(_) => Value::from(rng.gen::()), + ConcreteDataType::Int32(_) => Value::from(rng.gen::()), + ConcreteDataType::Int64(_) => Value::from(rng.gen::()), + ConcreteDataType::Float32(_) => Value::from(rng.gen::()), + ConcreteDataType::Float64(_) => Value::from(rng.gen::()), + ConcreteDataType::String(_) => match random_str { + Some(random) => Value::from(random.gen(rng).value), + None => Value::from(rng.gen::().to_string()), + }, + ConcreteDataType::Date(_) => generate_random_date(rng), + ConcreteDataType::DateTime(_) => generate_random_datetime(rng), + &ConcreteDataType::Timestamp(ts_type) => generate_random_timestamp_for_mysql(rng, ts_type), + + _ => unimplemented!("unsupported type: {datatype}"), + } +} + fn generate_random_timestamp(rng: &mut R, ts_type: TimestampType) -> Value { let v = match ts_type { TimestampType::Second(_) => { @@ -140,6 +177,37 @@ fn generate_random_timestamp(rng: &mut R, ts_type: TimestampType) -> Val Value::from(v) } +// MySQL supports timestamp from '1970-01-01 00:00:01.000000' to '2038-01-19 03:14:07.499999' +fn generate_random_timestamp_for_mysql(rng: &mut R, ts_type: TimestampType) -> Value { + let v = match ts_type { + TimestampType::Second(_) => { + let min = 1; + let max = 2_147_483_647; + let value = rng.gen_range(min..=max); + Timestamp::new_second(value) + } + TimestampType::Millisecond(_) => { + let min = 1000; + let max = 2_147_483_647_499; + let value = rng.gen_range(min..=max); + Timestamp::new_millisecond(value) + } + TimestampType::Microsecond(_) => { + let min = 1_000_000; + let max = 2_147_483_647_499_999; + let value = rng.gen_range(min..=max); + Timestamp::new_microsecond(value) + } + TimestampType::Nanosecond(_) => { + let min = 1_000_000_000; + let max = 2_147_483_647_499_999_000; + let value = rng.gen_range(min..=max); + Timestamp::new_nanosecond(value) + } + }; + Value::from(v) +} + fn generate_random_datetime(rng: &mut R) -> Value { let min = i64::from(Timestamp::MIN_MILLISECOND); let max = i64::from(Timestamp::MAX_MILLISECOND); @@ -258,6 +326,14 @@ impl Column { ) }) } + + // Returns default value if it has. + pub fn default_value(&self) -> Option<&Value> { + self.options.iter().find_map(|opt| match opt { + ColumnOption::DefaultValue(value) => Some(value), + _ => None, + }) + } } /// Returns droppable columns. i.e., non-primary key columns, non-ts columns. diff --git a/tests-fuzz/src/ir/insert_expr.rs b/tests-fuzz/src/ir/insert_expr.rs index c7476c517031..1b1c19537675 100644 --- a/tests-fuzz/src/ir/insert_expr.rs +++ b/tests-fuzz/src/ir/insert_expr.rs @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::fmt::Display; +use std::fmt::{Debug, Display}; use datatypes::value::Value; @@ -20,17 +20,28 @@ use crate::ir::Column; pub struct InsertIntoExpr { pub table_name: String, + pub omit_column_list: bool, pub columns: Vec, pub values_list: Vec, } pub type RowValues = Vec; +#[derive(PartialEq, PartialOrd, Clone)] pub enum RowValue { Value(Value), Default, } +impl RowValue { + pub fn cmp(&self, other: &Self) -> Option { + match (self, other) { + (RowValue::Value(v1), RowValue::Value(v2)) => v1.partial_cmp(v2), + _ => panic!("Invalid comparison: {:?} and {:?}", self, other), + } + } +} + impl Display for RowValue { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { @@ -46,3 +57,38 @@ impl Display for RowValue { } } } + +impl Debug for RowValue { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + RowValue::Value(v) => match v { + Value::Null => write!(f, "NULL"), + v @ (Value::String(_) + | Value::Timestamp(_) + | Value::DateTime(_) + | Value::Date(_)) => write!(f, "'{}'", v), + v => write!(f, "{}", v), + }, + RowValue::Default => write!(f, "DEFAULT"), + } + } +} + +#[cfg(test)] +mod tests { + use common_time::Timestamp; + use datatypes::value::Value; + + use crate::ir::insert_expr::RowValue; + + #[test] + fn test_value_cmp() { + let time_stampe1 = + Value::Timestamp(Timestamp::from_str_utc("-39988-01-31 01:21:12.848697+0000").unwrap()); + let time_stampe2 = + Value::Timestamp(Timestamp::from_str_utc("+12970-09-22 08:40:58.392839+0000").unwrap()); + let v1 = RowValue::Value(time_stampe1); + let v2 = RowValue::Value(time_stampe2); + assert_eq!(v1.cmp(&v2), Some(std::cmp::Ordering::Less)); + } +} diff --git a/tests-fuzz/src/translator/mysql/insert_expr.rs b/tests-fuzz/src/translator/mysql/insert_expr.rs index 0e2252cbc54a..61340fdcbf06 100644 --- a/tests-fuzz/src/translator/mysql/insert_expr.rs +++ b/tests-fuzz/src/translator/mysql/insert_expr.rs @@ -33,7 +33,7 @@ impl DslTranslator for InsertIntoExprTranslator { impl InsertIntoExprTranslator { fn format_columns(input: &InsertIntoExpr) -> String { - if input.columns.is_empty() { + if input.omit_column_list { "".to_string() } else { let list = input diff --git a/tests-fuzz/src/validator.rs b/tests-fuzz/src/validator.rs index 198d009a152b..cf2df9af229c 100644 --- a/tests-fuzz/src/validator.rs +++ b/tests-fuzz/src/validator.rs @@ -13,3 +13,4 @@ // limitations under the License. pub mod column; +pub mod row; diff --git a/tests-fuzz/src/validator/row.rs b/tests-fuzz/src/validator/row.rs new file mode 100644 index 000000000000..b17ea1dd6725 --- /dev/null +++ b/tests-fuzz/src/validator/row.rs @@ -0,0 +1,162 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use chrono::{DateTime as ChronoDateTime, NaiveDate, NaiveDateTime, Utc}; +use common_time::date::Date; +use common_time::{DateTime, Timestamp}; +use datatypes::value::Value; +use snafu::{ensure, ResultExt}; +use sqlx::database::HasArguments; +use sqlx::{ + Column, ColumnIndex, Database, Decode, Encode, Executor, IntoArguments, Row, Type, TypeInfo, + ValueRef, +}; + +use crate::error::{self, Result}; +use crate::ir::insert_expr::{RowValue, RowValues}; + +/// Asserts fetched_rows are equal to rows +pub fn assert_eq<'a, DB>( + columns: &[crate::ir::Column], + fetched_rows: &'a [::Row], + rows: &[RowValues], +) -> Result<()> +where + DB: Database, + usize: ColumnIndex<::Row>, + bool: sqlx::Type + sqlx::Decode<'a, DB>, + i8: sqlx::Type + sqlx::Decode<'a, DB>, + i16: sqlx::Type + sqlx::Decode<'a, DB>, + i32: sqlx::Type + sqlx::Decode<'a, DB>, + i64: sqlx::Type + sqlx::Decode<'a, DB>, + f32: sqlx::Type + sqlx::Decode<'a, DB>, + f64: sqlx::Type + sqlx::Decode<'a, DB>, + String: sqlx::Type + sqlx::Decode<'a, DB>, + Vec: sqlx::Type + sqlx::Decode<'a, DB>, + ChronoDateTime: sqlx::Type + sqlx::Decode<'a, DB>, + NaiveDateTime: sqlx::Type + sqlx::Decode<'a, DB>, + NaiveDate: sqlx::Type + sqlx::Decode<'a, DB>, +{ + ensure!( + fetched_rows.len() == rows.len(), + error::AssertSnafu { + reason: format!( + "Expected values length: {}, got: {}", + rows.len(), + fetched_rows.len(), + ) + } + ); + + for (idx, fetched_row) in fetched_rows.iter().enumerate() { + let row = &rows[idx]; + + ensure!( + fetched_row.len() == row.len(), + error::AssertSnafu { + reason: format!( + "Expected row length: {}, got: {}", + row.len(), + fetched_row.len(), + ) + } + ); + + for (idx, value) in row.iter().enumerate() { + let fetched_value = if fetched_row.try_get_raw(idx).unwrap().is_null() { + RowValue::Value(Value::Null) + } else { + let value_type = fetched_row.column(idx).type_info().name(); + match value_type { + "BOOL" | "BOOLEAN" => RowValue::Value(Value::Boolean( + fetched_row.try_get::(idx).unwrap(), + )), + "TINYINT" => { + RowValue::Value(Value::Int8(fetched_row.try_get::(idx).unwrap())) + } + "SMALLINT" => RowValue::Value(Value::Int16( + fetched_row.try_get::(idx).unwrap(), + )), + "INT" => RowValue::Value(Value::Int32( + fetched_row.try_get::(idx).unwrap(), + )), + "BIGINT" => RowValue::Value(Value::Int64( + fetched_row.try_get::(idx).unwrap(), + )), + "FLOAT" => RowValue::Value(Value::Float32(datatypes::value::OrderedFloat( + fetched_row.try_get::(idx).unwrap(), + ))), + "DOUBLE" => RowValue::Value(Value::Float64(datatypes::value::OrderedFloat( + fetched_row.try_get::(idx).unwrap(), + ))), + "VARCHAR" | "CHAR" | "TEXT" => RowValue::Value(Value::String( + fetched_row.try_get::(idx).unwrap().into(), + )), + "VARBINARY" | "BINARY" | "BLOB" => RowValue::Value(Value::Binary( + fetched_row.try_get::, usize>(idx).unwrap().into(), + )), + "TIMESTAMP" => RowValue::Value(Value::Timestamp( + Timestamp::from_chrono_datetime( + fetched_row + .try_get::, usize>(idx) + .unwrap() + .naive_utc(), + ) + .unwrap(), + )), + "DATETIME" => RowValue::Value(Value::DateTime(DateTime::from( + fetched_row.try_get::(idx).unwrap(), + ))), + "DATE" => RowValue::Value(Value::Date(Date::from( + fetched_row.try_get::(idx).unwrap(), + ))), + _ => panic!("Unsupported type: {}", value_type), + } + }; + + let value = match value { + // In MySQL, boolean is stored as TINYINT(1) + RowValue::Value(Value::Boolean(v)) => RowValue::Value(Value::Int8(*v as i8)), + RowValue::Default => match columns[idx].default_value().unwrap().clone() { + Value::Boolean(v) => RowValue::Value(Value::Int8(v as i8)), + default_value => RowValue::Value(default_value), + }, + _ => value.clone(), + }; + ensure!( + value == fetched_value, + error::AssertSnafu { + reason: format!("Expected value: {:?}, got: {:?}", value, fetched_value) + } + ) + } + } + + Ok(()) +} + +/// Returns all [RowEntry] of the `table_name`. +pub async fn fetch_values<'a, DB, E>(e: E, sql: &'a str) -> Result::Row>> +where + DB: Database, + >::Arguments: IntoArguments<'a, DB>, + for<'c> E: 'a + Executor<'c, Database = DB>, + for<'c> String: Decode<'c, DB> + Type, + for<'c> String: Encode<'c, DB> + Type, +{ + sqlx::query(sql) + .fetch_all(e) + .await + .context(error::ExecuteQuerySnafu { sql }) +} diff --git a/tests-fuzz/targets/fuzz_insert.rs b/tests-fuzz/targets/fuzz_insert.rs index 3f133b289424..eab40cb7ec9a 100644 --- a/tests-fuzz/targets/fuzz_insert.rs +++ b/tests-fuzz/targets/fuzz_insert.rs @@ -32,11 +32,14 @@ use tests_fuzz::fake::{ use tests_fuzz::generator::create_expr::CreateTableExprGeneratorBuilder; use tests_fuzz::generator::insert_expr::InsertExprGeneratorBuilder; use tests_fuzz::generator::Generator; -use tests_fuzz::ir::{CreateTableExpr, InsertIntoExpr}; +use tests_fuzz::ir::{ + generate_random_value_for_mysql, CreateTableExpr, InsertIntoExpr, MySQLTsColumnTypeGenerator, +}; use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; use tests_fuzz::translator::mysql::insert_expr::InsertIntoExprTranslator; use tests_fuzz::translator::DslTranslator; use tests_fuzz::utils::{init_greptime_connections_via_env, Connections}; +use tests_fuzz::validator; struct FuzzContext { greptime: Pool, @@ -80,6 +83,7 @@ fn generate_create_expr( ))) .columns(input.columns) .engine("mito") + .ts_column_type_generator(Box::new(MySQLTsColumnTypeGenerator)) .build() .unwrap(); create_table_generator.generate(rng) @@ -96,6 +100,7 @@ fn generate_insert_expr( .table_ctx(table_ctx) .omit_column_list(omit_column_list) .rows(input.rows) + .value_generator(Box::new(generate_random_value_for_mysql)) .build() .unwrap(); insert_generator.generate(rng) @@ -135,7 +140,37 @@ async fn execute_insert(ctx: FuzzContext, input: FuzzInput) -> Result<()> { } ); - // TODO: Validate inserted rows + // Validate inserted rows + let ts_column_idx = create_expr + .columns + .iter() + .position(|c| c.is_time_index()) + .unwrap(); + let ts_column_name = create_expr.columns[ts_column_idx].name.clone(); + let ts_column_idx_in_insert = insert_expr + .columns + .iter() + .position(|c| c.name == ts_column_name) + .unwrap(); + let column_list = insert_expr + .columns + .iter() + .map(|c| c.name.to_string()) + .collect::>() + .join(", ") + .to_string(); + let select_sql = format!( + "SELECT {} FROM {} ORDER BY {}", + column_list, create_expr.table_name, ts_column_name + ); + let fetched_rows = validator::row::fetch_values(&ctx.greptime, select_sql.as_str()).await?; + let mut expected_rows = insert_expr.values_list; + expected_rows.sort_by(|a, b| { + a[ts_column_idx_in_insert] + .cmp(&b[ts_column_idx_in_insert]) + .unwrap() + }); + validator::row::assert_eq::(&insert_expr.columns, &fetched_rows, &expected_rows)?; // Cleans up let sql = format!("DROP TABLE {}", create_expr.table_name); diff --git a/tests-fuzz/targets/fuzz_insert_logical_table.rs b/tests-fuzz/targets/fuzz_insert_logical_table.rs index 47f53386a859..fc8b2f9bd775 100644 --- a/tests-fuzz/targets/fuzz_insert_logical_table.rs +++ b/tests-fuzz/targets/fuzz_insert_logical_table.rs @@ -34,11 +34,12 @@ use tests_fuzz::generator::create_expr::{ }; use tests_fuzz::generator::insert_expr::InsertExprGeneratorBuilder; use tests_fuzz::generator::Generator; -use tests_fuzz::ir::{CreateTableExpr, InsertIntoExpr}; +use tests_fuzz::ir::{generate_random_value_for_mysql, CreateTableExpr, InsertIntoExpr}; use tests_fuzz::translator::mysql::create_expr::CreateTableExprTranslator; use tests_fuzz::translator::mysql::insert_expr::InsertIntoExprTranslator; use tests_fuzz::translator::DslTranslator; use tests_fuzz::utils::{init_greptime_connections_via_env, Connections}; +use tests_fuzz::validator; struct FuzzContext { greptime: Pool, @@ -107,6 +108,7 @@ fn generate_insert_expr( .omit_column_list(false) .table_ctx(table_ctx) .rows(input.rows) + .value_generator(Box::new(generate_random_value_for_mysql)) .build() .unwrap(); insert_generator.generate(rng) @@ -160,7 +162,39 @@ async fn execute_insert(ctx: FuzzContext, input: FuzzInput) -> Result<()> { } ); - // TODO: Validate inserted rows + // Validate inserted rows + let ts_column_idx = create_logical_table_expr + .columns + .iter() + .position(|c| c.is_time_index()) + .unwrap(); + let ts_column_name = create_logical_table_expr.columns[ts_column_idx] + .name + .clone(); + let ts_column_idx_in_insert = insert_expr + .columns + .iter() + .position(|c| c.name == ts_column_name) + .unwrap(); + let column_list = insert_expr + .columns + .iter() + .map(|c| c.name.to_string()) + .collect::>() + .join(", ") + .to_string(); + let select_sql = format!( + "SELECT {} FROM {} ORDER BY {}", + column_list, create_logical_table_expr.table_name, ts_column_name + ); + let fetched_rows = validator::row::fetch_values(&ctx.greptime, select_sql.as_str()).await?; + let mut expected_rows = insert_expr.values_list; + expected_rows.sort_by(|a, b| { + a[ts_column_idx_in_insert] + .cmp(&b[ts_column_idx_in_insert]) + .unwrap() + }); + validator::row::assert_eq::(&insert_expr.columns, &fetched_rows, &expected_rows)?; // Clean up logical table let sql = format!("DROP TABLE {}", create_logical_table_expr.table_name);