Skip to content

Commit

Permalink
feat: add validate method to CreateExpr (#3772)
Browse files Browse the repository at this point in the history
* feat: add validate method to CreateExpr

Signed-off-by: Ruihang Xia <[email protected]>

* add sqlness reproducer

Signed-off-by: Ruihang Xia <[email protected]>

* verify region create request

Signed-off-by: Ruihang Xia <[email protected]>

* fix existing test

Signed-off-by: Ruihang Xia <[email protected]>

* add tailing empty line

Signed-off-by: Ruihang Xia <[email protected]>

* add more validation

Signed-off-by: Ruihang Xia <[email protected]>

* fix typo

Signed-off-by: Ruihang Xia <[email protected]>

* disable metric table fuzz

Signed-off-by: Ruihang Xia <[email protected]>

* minor refactor

Signed-off-by: Ruihang Xia <[email protected]>

---------

Signed-off-by: Ruihang Xia <[email protected]>
  • Loading branch information
waynexia authored Apr 24, 2024
1 parent 659d34a commit df01ac0
Show file tree
Hide file tree
Showing 9 changed files with 345 additions and 85 deletions.
5 changes: 3 additions & 2 deletions src/metric-engine/src/data_region.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use store_api::region_request::{
AddColumn, AffectedRows, AlterKind, RegionAlterRequest, RegionPutRequest, RegionRequest,
};
use store_api::storage::consts::ReservedColumnId;
use store_api::storage::RegionId;
use store_api::storage::{ConcreteDataType, RegionId};

use crate::error::{
ColumnTypeMismatchSnafu, MitoReadOperationSnafu, MitoWriteOperationSnafu, Result,
Expand Down Expand Up @@ -128,7 +128,8 @@ impl DataRegion {
if c.semantic_type == SemanticType::Tag {
if !c.column_schema.data_type.is_string() {
return ColumnTypeMismatchSnafu {
column_type: c.column_schema.data_type.clone(),
expect: ConcreteDataType::string_datatype(),
actual: c.column_schema.data_type.clone(),
}
.fail();
}
Expand Down
98 changes: 84 additions & 14 deletions src/metric-engine/src/engine/create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,9 +43,11 @@ use crate::engine::options::{
};
use crate::engine::MetricEngineInner;
use crate::error::{
ColumnNotFoundSnafu, ConflictRegionOptionSnafu, CreateMitoRegionSnafu,
InternalColumnOccupiedSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu,
ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu, Result, SerializeColumnMetadataSnafu,
AddingFieldColumnSnafu, ColumnNotFoundSnafu, ColumnTypeMismatchSnafu,
ConflictRegionOptionSnafu, CreateMitoRegionSnafu, InternalColumnOccupiedSnafu,
InvalidMetadataSnafu, MissingRegionOptionSnafu, MitoReadOperationSnafu,
MultipleFieldColumnSnafu, NoFieldColumnSnafu, ParseRegionIdSnafu, PhysicalRegionNotFoundSnafu,
Result, SerializeColumnMetadataSnafu,
};
use crate::metrics::{LOGICAL_REGION_COUNT, PHYSICAL_COLUMN_COUNT, PHYSICAL_REGION_COUNT};
use crate::utils::{to_data_region_id, to_metadata_region_id};
Expand Down Expand Up @@ -191,6 +193,14 @@ impl MetricEngineInner {
})?;
for col in &request.column_metadatas {
if !physical_columns.contains(&col.column_schema.name) {
// Multi-field on physical table is explicit forbidden at present
// TODO(ruihang): support multi-field on both logical and physical column
ensure!(
col.semantic_type != SemanticType::Field,
AddingFieldColumnSnafu {
name: col.column_schema.name.clone()
}
);
new_columns.push(col.clone());
} else {
existing_columns.push(col.column_schema.name.clone());
Expand Down Expand Up @@ -290,6 +300,8 @@ impl MetricEngineInner {
/// - required table option is present ([PHYSICAL_TABLE_METADATA_KEY] or
/// [LOGICAL_TABLE_METADATA_KEY])
fn verify_region_create_request(request: &RegionCreateRequest) -> Result<()> {
request.validate().context(InvalidMetadataSnafu)?;

let name_to_index = request
.column_metadatas
.iter()
Expand Down Expand Up @@ -323,6 +335,41 @@ impl MetricEngineInner {
ConflictRegionOptionSnafu {}
);

// check if only one field column is declared, and all tag columns are string
let mut field_col: Option<&ColumnMetadata> = None;
for col in &request.column_metadatas {
match col.semantic_type {
SemanticType::Tag => ensure!(
col.column_schema.data_type == ConcreteDataType::string_datatype(),
ColumnTypeMismatchSnafu {
expect: ConcreteDataType::string_datatype(),
actual: col.column_schema.data_type.clone(),
}
),
SemanticType::Field => {
if field_col.is_some() {
MultipleFieldColumnSnafu {
previous: field_col.unwrap().column_schema.name.clone(),
current: col.column_schema.name.clone(),
}
.fail()?;
}
field_col = Some(col)
}
SemanticType::Timestamp => {}
}
}
let field_col = field_col.context(NoFieldColumnSnafu)?;

// make sure the field column is float64 type
ensure!(
field_col.column_schema.data_type == ConcreteDataType::float64_datatype(),
ColumnTypeMismatchSnafu {
expect: ConcreteDataType::float64_datatype(),
actual: field_col.column_schema.data_type.clone(),
}
);

Ok(())
}

Expand Down Expand Up @@ -531,6 +578,15 @@ mod test {
false,
),
},
ColumnMetadata {
column_id: 2,
semantic_type: SemanticType::Field,
column_schema: ColumnSchema::new(
"column2".to_string(),
ConcreteDataType::float64_datatype(),
false,
),
},
],
region_dir: "test_dir".to_string(),
engine: METRIC_ENGINE_NAME.to_string(),
Expand All @@ -539,37 +595,51 @@ mod test {
.into_iter()
.collect(),
};
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
MetricEngineInner::verify_region_create_request(&request).unwrap();
}

#[test]
fn test_verify_region_create_request_options() {
let mut request = RegionCreateRequest {
column_metadatas: vec![],
column_metadatas: vec![
ColumnMetadata {
column_id: 0,
semantic_type: SemanticType::Timestamp,
column_schema: ColumnSchema::new(
METADATA_SCHEMA_TIMESTAMP_COLUMN_NAME,
ConcreteDataType::timestamp_millisecond_datatype(),
false,
),
},
ColumnMetadata {
column_id: 1,
semantic_type: SemanticType::Field,
column_schema: ColumnSchema::new(
"val".to_string(),
ConcreteDataType::float64_datatype(),
false,
),
},
],
region_dir: "test_dir".to_string(),
engine: METRIC_ENGINE_NAME.to_string(),
primary_key: vec![],
options: HashMap::new(),
};
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_err());
MetricEngineInner::verify_region_create_request(&request).unwrap_err();

let mut options = HashMap::new();
options.insert(PHYSICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
request.options.clone_from(&options);
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
MetricEngineInner::verify_region_create_request(&request).unwrap();

options.insert(LOGICAL_TABLE_METADATA_KEY.to_string(), "value".to_string());
request.options.clone_from(&options);
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_err());
MetricEngineInner::verify_region_create_request(&request).unwrap_err();

options.remove(PHYSICAL_TABLE_METADATA_KEY).unwrap();
request.options = options;
let result = MetricEngineInner::verify_region_create_request(&request);
assert!(result.is_ok());
MetricEngineInner::verify_region_create_request(&request).unwrap();
}

#[tokio::test]
Expand Down
23 changes: 20 additions & 3 deletions src/metric-engine/src/error.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,9 +133,10 @@ pub enum Error {
location: Location,
},

#[snafu(display("Column type mismatch. Expect string, got {:?}", column_type))]
#[snafu(display("Column type mismatch. Expect {:?}, got {:?}", expect, actual))]
ColumnTypeMismatch {
column_type: ConcreteDataType,
expect: ConcreteDataType,
actual: ConcreteDataType,
location: Location,
},

Expand Down Expand Up @@ -169,6 +170,19 @@ pub enum Error {
request: RegionRequest,
location: Location,
},

#[snafu(display("Multiple field column found: {} and {}", previous, current))]
MultipleFieldColumn {
previous: String,
current: String,
location: Location,
},

#[snafu(display("Adding field column {} to physical table", name))]
AddingFieldColumn { name: String, location: Location },

#[snafu(display("No field column found"))]
NoFieldColumn { location: Location },
}

pub type Result<T, E = Error> = std::result::Result<T, E>;
Expand All @@ -182,7 +196,10 @@ impl ErrorExt for Error {
| MissingRegionOption { .. }
| ConflictRegionOption { .. }
| ColumnTypeMismatch { .. }
| PhysicalRegionBusy { .. } => StatusCode::InvalidArguments,
| PhysicalRegionBusy { .. }
| MultipleFieldColumn { .. }
| NoFieldColumn { .. }
| AddingFieldColumn { .. } => StatusCode::InvalidArguments,

ForbiddenPhysicalAlter { .. } | UnsupportedRegionRequest { .. } => {
StatusCode::Unsupported
Expand Down
4 changes: 2 additions & 2 deletions src/metric-engine/src/test_util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -210,9 +210,9 @@ pub fn create_logical_region_request(
),
},
];
for tag in tags {
for (bias, tag) in tags.iter().enumerate() {
column_metadatas.push(ColumnMetadata {
column_id: 2,
column_id: 2 + bias as ColumnId,
semantic_type: SemanticType::Tag,
column_schema: ColumnSchema::new(
tag.to_string(),
Expand Down
94 changes: 92 additions & 2 deletions src/operator/src/expr_factory.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.

use std::collections::HashMap;
use std::collections::{HashMap, HashSet};

use api::helper::ColumnDataTypeWrapper;
use api::v1::alter_expr::Kind;
Expand All @@ -31,7 +31,7 @@ use query::sql::{
};
use session::context::QueryContextRef;
use session::table_name::table_idents_to_full_name;
use snafu::{ensure, ResultExt};
use snafu::{ensure, OptionExt, ResultExt};
use sql::ast::{ColumnDef, ColumnOption, TableConstraint};
use sql::statements::alter::{AlterTable, AlterTableOperation};
use sql::statements::create::{CreateExternalTable, CreateTable, TIME_INDEX};
Expand Down Expand Up @@ -214,9 +214,72 @@ pub fn create_to_expr(create: &CreateTable, query_ctx: QueryContextRef) -> Resul
table_id: None,
engine: create.engine.to_string(),
};

validate_create_expr(&expr)?;
Ok(expr)
}

/// Validate the [`CreateTableExpr`] request.
pub fn validate_create_expr(create: &CreateTableExpr) -> Result<()> {
// construct column list
let mut column_to_indices = HashMap::with_capacity(create.column_defs.len());
for (idx, column) in create.column_defs.iter().enumerate() {
if let Some(indices) = column_to_indices.get(&column.name) {
return InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is duplicated at index {} and {}",
column.name, indices, idx
),
}
.fail();
}
column_to_indices.insert(&column.name, idx);
}

// verify time_index exists
let _ = column_to_indices
.get(&create.time_index)
.with_context(|| InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is not found in column list",
create.time_index
),
})?;

// verify primary_key exists
for pk in &create.primary_keys {
let _ = column_to_indices
.get(&pk)
.with_context(|| InvalidSqlSnafu {
err_msg: format!("column name `{}` is not found in column list", pk),
})?;
}

// construct primary_key set
let mut pk_set = HashSet::new();
for pk in &create.primary_keys {
if !pk_set.insert(pk) {
return InvalidSqlSnafu {
err_msg: format!("column name `{}` is duplicated in primary keys", pk),
}
.fail();
}
}

// verify time index is not primary key
if pk_set.contains(&create.time_index) {
return InvalidSqlSnafu {
err_msg: format!(
"column name `{}` is both primary key and time index",
create.time_index
),
}
.fail();
}

Ok(())
}

fn find_primary_keys(
columns: &[ColumnDef],
constraints: &[TableConstraint],
Expand Down Expand Up @@ -457,6 +520,33 @@ mod tests {
);
}

#[test]
fn test_invalid_create_to_expr() {
let cases = [
// duplicate column declaration
"CREATE TABLE monitor (host STRING primary key, ts TIMESTAMP TIME INDEX, some_column text, some_column string);",
// duplicate primary key
"CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, some_column STRING, PRIMARY KEY (some_column, host, some_column));",
// time index is primary key
"CREATE TABLE monitor (host STRING, ts TIMESTAMP TIME INDEX, PRIMARY KEY (host, ts));"
];

for sql in cases {
let stmt = ParserContext::create_with_dialect(
sql,
&GreptimeDbDialect {},
ParseOptions::default(),
)
.unwrap()
.pop()
.unwrap();
let Statement::CreateTable(create_table) = stmt else {
unreachable!()
};
create_to_expr(&create_table, QueryContext::arc()).unwrap_err();
}
}

#[test]
fn test_create_to_expr_with_default_timestamp_value() {
let sql = "CREATE TABLE monitor (v double,ts TIMESTAMP default '2024-01-30T00:01:01',TIME INDEX (ts)) engine=mito;";
Expand Down
Loading

0 comments on commit df01ac0

Please sign in to comment.