Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(fuzz): add create logical table target #3756

Merged
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/develop.yml
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database" ]
target: [ "fuzz_create_table", "fuzz_alter_table", "fuzz_create_database", "fuzz_create_logical_table" ]
steps:
- uses: actions/checkout@v4
- uses: arduino/setup-protoc@v3
Expand Down
7 changes: 7 additions & 0 deletions tests-fuzz/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,13 @@ test = false
bench = false
doc = false

[[bin]]
name = "fuzz_create_logical_table"
path = "targets/fuzz_create_logical_table.rs"
test = false
bench = false
doc = false

[[bin]]
name = "fuzz_insert"
path = "targets/fuzz_insert.rs"
Expand Down
207 changes: 204 additions & 3 deletions tests-fuzz/src/generator/create_expr.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
// limitations under the License.

use std::collections::HashMap;
use std::marker::PhantomData;

use datatypes::value::Value;
use derive_builder::Builder;
Expand All @@ -22,14 +23,19 @@ use rand::Rng;
use snafu::{ensure, ResultExt};

use super::Generator;
use crate::context::TableContextRef;
use crate::error::{self, Error, Result};
use crate::fake::{random_capitalize_map, MappedGenerator, WordGenerator};
use crate::fake::{
merge_two_word_map_fn, random_capitalize_map, uppercase_and_keyword_backtick_map,
MappedGenerator, WordGenerator,
};
use crate::generator::{ColumnOptionGenerator, ConcreteDataTypeGenerator, Random};
use crate::ir::create_expr::{CreateDatabaseExprBuilder, CreateTableExprBuilder};
use crate::ir::create_expr::{ColumnOption, CreateDatabaseExprBuilder, CreateTableExprBuilder};
use crate::ir::{
column_options_generator, generate_columns, generate_random_value,
partible_column_options_generator, ts_column_options_generator, ColumnTypeGenerator,
CreateDatabaseExpr, CreateTableExpr, Ident, PartibleColumnTypeGenerator, TsColumnTypeGenerator,
CreateDatabaseExpr, CreateTableExpr, Ident, PartibleColumnTypeGenerator,
StringColumnTypeGenerator, TsColumnTypeGenerator,
};

#[derive(Builder)]
Expand Down Expand Up @@ -200,6 +206,115 @@ impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreateTableExprGenerato
}
}

/// Generate a physical table with 2 columns: time index and value.
#[derive(Builder)]
#[builder(pattern = "owned")]
pub struct CreatePhysicalTableExprGenerator<R: Rng + 'static> {
#[builder(default)]
_phantom: PhantomData<R>,
}

impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreatePhysicalTableExprGenerator<R> {
type Error = Error;

fn generate(&self, rng: &mut R) -> Result<CreateTableExpr> {
let if_not_exists = rng.gen_bool(0.5);
WenyXu marked this conversation as resolved.
Show resolved Hide resolved

let create_physical_table_generator = CreateTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.columns(2)
.engine("metric")
.if_not_exists(if_not_exists)
.with_clause([("physical_metric_table".to_string(), "".to_string())])
.build()
.unwrap();

create_physical_table_generator.generate(rng)
}
}
WenyXu marked this conversation as resolved.
Show resolved Hide resolved

/// Generate a logical table based on an existing physical table.
#[derive(Builder)]
#[builder(pattern = "owned")]
pub struct CreateLogicalTableExprGenerator<R: Rng + 'static> {
table_ctx: TableContextRef,
WenyXu marked this conversation as resolved.
Show resolved Hide resolved
labels: usize,
#[builder(default)]
_phantom: PhantomData<R>,
}

impl<R: Rng + 'static> Generator<CreateTableExpr, R> for CreateLogicalTableExprGenerator<R> {
type Error = Error;

fn generate(&self, rng: &mut R) -> Result<CreateTableExpr> {
// Currently we mock the usage of GreptimeDB as Prometheus' backend, the physical table must have two columns.
ensure!(
self.table_ctx.columns.len() == 2,
error::UnexpectedSnafu {
violated: "The physical table must have two columns"
}
);

// Generates the logical table columns based on the physical table.
let physical_table_name = self.table_ctx.name.to_string().replace('`', "");
let table_generator = CreateTableExprGeneratorBuilder::default()
.name_generator(Box::new(MappedGenerator::new(
WordGenerator,
merge_two_word_map_fn(random_capitalize_map, uppercase_and_keyword_backtick_map),
)))
.columns(self.labels)
.engine("metric")
.column_type_generator(Box::new(StringColumnTypeGenerator))
.column_options_generator(Box::new(partible_column_options_generator))
.with_clause([(
"on_physical_table".to_string(),
physical_table_name.to_string(),
)])
.build()
.unwrap();

let mut table = table_generator.generate(rng)?;
while table.table_name.value == physical_table_name {
table.table_name = table_generator.name_generator.gen(rng);
}
let logical_ts = table.columns.iter().position(|column| {
column
.options
.iter()
.any(|option| option == &ColumnOption::TimeIndex)
});
table.columns.remove(logical_ts.unwrap());
table.columns.iter_mut().for_each(|column| {
// Only keeps the primary key option for string columns.
column
.options
.retain(|option| option == &ColumnOption::PrimaryKey);
// Ensures the column name is unique.
while column.name.value == self.table_ctx.columns[0].name.value
|| column.name.value == self.table_ctx.columns[1].name.value
{
column.name = table_generator.name_generator.gen(rng);
}
});

table.columns.extend(self.table_ctx.columns.clone());

let mut primary_keys = vec![];
for (idx, column) in table.columns.iter().enumerate() {
if column.is_primary_key() {
primary_keys.push(idx);
}
}
primary_keys.shuffle(rng);
table.primary_keys = primary_keys;
WenyXu marked this conversation as resolved.
Show resolved Hide resolved

Ok(table)
}
}

#[derive(Builder)]
#[builder(default, pattern = "owned")]
pub struct CreateDatabaseExprGenerator<R: Rng + 'static> {
Expand Down Expand Up @@ -236,10 +351,14 @@ impl<R: Rng + 'static> Generator<CreateDatabaseExpr, R> for CreateDatabaseExprGe

#[cfg(test)]
mod tests {
use std::sync::Arc;

use datatypes::data_type::ConcreteDataType;
use datatypes::value::Value;
use rand::SeedableRng;

use super::*;
use crate::context::TableContext;

#[test]
fn test_float64() {
Expand Down Expand Up @@ -296,6 +415,88 @@ mod tests {
assert_eq!(expected, serialized);
}

#[test]
fn test_create_logical_table_expr_generator() {
let mut rng = rand::thread_rng();

let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default()
.build()
.unwrap()
.generate(&mut rng)
.unwrap();
assert_eq!(physical_table_expr.engine, "metric");
assert_eq!(physical_table_expr.columns.len(), 2);

let physical_ts = physical_table_expr.columns.iter().position(|column| {
column
.options
.iter()
.any(|option| option == &ColumnOption::TimeIndex)
});
let physical_ts_name = physical_table_expr.columns[physical_ts.unwrap()]
.name
.value
.to_string();

let table_ctx = Arc::new(TableContext::from(&physical_table_expr));

let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default()
.table_ctx(table_ctx)
.labels(5)
.build()
.unwrap()
.generate(&mut rng)
.unwrap();
let logical_ts = logical_table_expr.columns.iter().position(|column| {
column
.options
.iter()
.any(|option| option == &ColumnOption::TimeIndex)
});
let logical_ts_name = logical_table_expr.columns[logical_ts.unwrap()]
.name
.value
.to_string();

assert_eq!(logical_table_expr.engine, "metric");
assert_eq!(logical_table_expr.columns.len(), 6);
assert_eq!(logical_ts_name, physical_ts_name);
assert!(logical_table_expr
.columns
.iter()
.all(
|column| column.column_type != ConcreteDataType::string_datatype()
|| column.options.contains(&ColumnOption::PrimaryKey)
));
}

#[test]
fn test_create_logical_table_expr_generator_deterministic() {
let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
let physical_table_expr = CreatePhysicalTableExprGeneratorBuilder::default()
.build()
.unwrap()
.generate(&mut rng)
.unwrap();
let physical_table_serialized = serde_json::to_string(&physical_table_expr).unwrap();
let physical_table_expected = r#"{"table_name":{"value":"asSumENda","quote_style":"`"},"columns":[{"name":{"value":"TOtam","quote_style":"`"},"column_type":{"Timestamp":{"Second":null}},"options":["TimeIndex"]},{"name":{"value":"quI","quote_style":"`"},"column_type":{"Float64":{}},"options":[{"DefaultValue":{"Float64":0.9184293397424537}}]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"physical_metric_table":{"String":""}},"primary_keys":[]}"#;
assert_eq!(physical_table_expected, physical_table_serialized);

let table_ctx = Arc::new(TableContext::from(&physical_table_expr));

let logical_table_expr = CreateLogicalTableExprGeneratorBuilder::default()
.table_ctx(table_ctx)
.labels(5)
.build()
.unwrap()
.generate(&mut rng)
.unwrap();

let logical_table_serialized = serde_json::to_string(&logical_table_expr).unwrap();
let logical_table_expected = r#"{"table_name":{"value":"Odit","quote_style":"`"},"columns":[{"name":{"value":"FUgIat","quote_style":"`"},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"sImilIQue","quote_style":"`"},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"qui","quote_style":null},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"nECEsSItATiBuS","quote_style":"`"},"column_type":{"String":null},"options":["PrimaryKey"]},{"name":{"value":"TOtam","quote_style":"`"},"column_type":{"Timestamp":{"Second":null}},"options":["TimeIndex"]},{"name":{"value":"quI","quote_style":"`"},"column_type":{"Float64":{}},"options":[{"DefaultValue":{"Float64":0.9184293397424537}}]}],"if_not_exists":false,"partition":null,"engine":"metric","options":{"on_physical_table":{"String":"asSumENda"}},"primary_keys":[3,0,2,1]}"#;
assert_eq!(logical_table_expected, logical_table_serialized);
}

#[test]
fn test_create_database_expr_generator() {
let mut rng = rand::thread_rng();
Expand Down
15 changes: 15 additions & 0 deletions tests-fuzz/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,8 @@ lazy_static! {
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
];
pub static ref STRING_DATA_TYPES: Vec<ConcreteDataType> =
vec![ConcreteDataType::string_datatype(),];
WenyXu marked this conversation as resolved.
Show resolved Hide resolved
}

impl_random!(ConcreteDataType, ColumnTypeGenerator, DATA_TYPES);
Expand All @@ -72,10 +74,16 @@ impl_random!(
PartibleColumnTypeGenerator,
PARTIBLE_DATA_TYPES
);
impl_random!(
ConcreteDataType,
StringColumnTypeGenerator,
STRING_DATA_TYPES
);

pub struct ColumnTypeGenerator;
pub struct TsColumnTypeGenerator;
pub struct PartibleColumnTypeGenerator;
pub struct StringColumnTypeGenerator;

/// Generates a random [Value].
pub fn generate_random_value<R: Rng>(
Expand Down Expand Up @@ -318,6 +326,13 @@ pub fn ts_column_options_generator<R: Rng + 'static>(
vec![ColumnOption::TimeIndex]
}

pub fn primary_key_column_options_generator<R: Rng + 'static>(
_: &mut R,
_: &ConcreteDataType,
) -> Vec<ColumnOption> {
vec![ColumnOption::PrimaryKey, ColumnOption::NotNull]
}

/// Generates columns with given `names`.
pub fn generate_columns<R: Rng + 'static>(
rng: &mut R,
Expand Down
Loading
Loading