From 3504d8254e459f4e01d44255322524d43f7f1937 Mon Sep 17 00:00:00 2001 From: Niwaka <61189782+NiwakaDev@users.noreply.github.com> Date: Sun, 3 Sep 2023 15:56:05 +0900 Subject: [PATCH] fix: unused table options (#2267) * fix: unused table options keys * refactor: simplify validate table options * chore: Add newlines --------- Co-authored-by: Yingwen --- Cargo.lock | 2 + src/common/datasource/src/object_store/s3.rs | 24 +++++ src/datanode/src/sql/create.rs | 3 +- src/frontend/src/statement.rs | 5 +- src/sql/Cargo.toml | 1 + src/sql/src/error.rs | 6 +- src/sql/src/parsers/create_parser.rs | 44 ++++++++-- src/sql/src/statements/create.rs | 23 +++++ src/table/Cargo.toml | 1 + src/table/src/requests.rs | 24 +++++ tests-integration/src/tests/instance_test.rs | 88 +++++++++++++++++++ .../cases/distributed/show/show_create.result | 34 ++++++- tests/cases/distributed/show/show_create.sql | 28 +++++- .../cases/standalone/show/show_create.result | 19 ++++ tests/cases/standalone/show/show_create.sql | 17 ++++ 15 files changed, 305 insertions(+), 14 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cac1497afaa7..7314b8a95e6b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -8980,6 +8980,7 @@ dependencies = [ "regex", "snafu", "sqlparser 0.34.0", + "table", ] [[package]] @@ -9458,6 +9459,7 @@ dependencies = [ "chrono", "common-base", "common-catalog", + "common-datasource", "common-error", "common-procedure", "common-query", diff --git a/src/common/datasource/src/object_store/s3.rs b/src/common/datasource/src/object_store/s3.rs index 2b7fedf041a6..87975280879d 100644 --- a/src/common/datasource/src/object_store/s3.rs +++ b/src/common/datasource/src/object_store/s3.rs @@ -27,6 +27,15 @@ const SESSION_TOKEN: &str = "session_token"; const REGION: &str = "region"; const ENABLE_VIRTUAL_HOST_STYLE: &str = "enable_virtual_host_style"; +pub fn is_supported_in_s3(key: &str) -> bool { + key == ENDPOINT + || key == ACCESS_KEY_ID + || key == SECRET_ACCESS_KEY + || key == SESSION_TOKEN + || key == REGION + || key == ENABLE_VIRTUAL_HOST_STYLE +} + pub fn build_s3_backend( host: &str, path: &str, @@ -75,3 +84,18 @@ pub fn build_s3_backend( .context(error::BuildBackendSnafu)? .finish()) } + +#[cfg(test)] +mod tests { + use super::*; + #[test] + fn test_is_supported_in_s3() { + assert!(is_supported_in_s3(ENDPOINT)); + assert!(is_supported_in_s3(ACCESS_KEY_ID)); + assert!(is_supported_in_s3(SECRET_ACCESS_KEY)); + assert!(is_supported_in_s3(SESSION_TOKEN)); + assert!(is_supported_in_s3(REGION)); + assert!(is_supported_in_s3(ENABLE_VIRTUAL_HOST_STYLE)); + assert!(!is_supported_in_s3("foo")) + } +} diff --git a/src/datanode/src/sql/create.rs b/src/datanode/src/sql/create.rs index 2665dcc87fa6..573ed6bdedc8 100644 --- a/src/datanode/src/sql/create.rs +++ b/src/datanode/src/sql/create.rs @@ -279,7 +279,7 @@ mod tests { "timestamp" timestamp TIME INDEX, "value" DOUBLE, host STRING PRIMARY KEY - ) engine=mito with(regions=1, ttl='7days',write_buffer_size='32MB',some='other');"#; + ) engine=mito with(regions=1, ttl='7days',write_buffer_size='32MB');"#; let parsed_stmt = sql_to_statement(sql); let c = SqlHandler::create_to_request(42, parsed_stmt, &TableReference::bare("demo_table")) .unwrap(); @@ -289,7 +289,6 @@ mod tests { Some(ReadableSize::mb(32)), c.table_options.write_buffer_size ); - assert_eq!("other", c.table_options.extra_options.get("some").unwrap()); } #[tokio::test] diff --git a/src/frontend/src/statement.rs b/src/frontend/src/statement.rs index 3deadcf8649e..1105ac9299ea 100644 --- a/src/frontend/src/statement.rs +++ b/src/frontend/src/statement.rs @@ -120,10 +120,9 @@ impl StatementExecutor { self.copy_database(to_copy_database_request(arg, &query_ctx)?) .await } - - Statement::CreateDatabase(_) - | Statement::CreateTable(_) + Statement::CreateTable(_) | Statement::CreateExternalTable(_) + | Statement::CreateDatabase(_) | Statement::Alter(_) | Statement::DropTable(_) | Statement::TruncateTable(_) diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index ee85f93616dd..5d271470aa12 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -19,6 +19,7 @@ once_cell.workspace = true regex.workspace = true snafu = { version = "0.7", features = ["backtraces"] } sqlparser.workspace = true +table = { workspace = true } [dev-dependencies] common-datasource = { workspace = true } diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs index 1ced9be3cef5..f5c375ea8f44 100644 --- a/src/sql/src/error.rs +++ b/src/sql/src/error.rs @@ -103,6 +103,9 @@ pub enum Error { source: datatypes::error::Error, }, + #[snafu(display("Invalid table option key: {}", key))] + InvalidTableOption { key: String, location: Location }, + #[snafu(display("Failed to serialize column default constraint, source: {}", source))] SerializeColumnDefaultConstraint { location: Location, @@ -168,7 +171,8 @@ impl ErrorExt for Error { | ColumnTypeMismatch { .. } | InvalidTableName { .. } | InvalidSqlValue { .. } - | TimestampOverflow { .. } => StatusCode::InvalidArguments, + | TimestampOverflow { .. } + | InvalidTableOption { .. } => StatusCode::InvalidArguments, SerializeColumnDefaultConstraint { source, .. } => source.status_code(), ConvertToGrpcDataType { source, .. } => source.status_code(), diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index 84ae1c87d6ae..71b0c3786918 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -13,6 +13,7 @@ // limitations under the License. use std::cmp::Ordering; +use std::collections::HashMap; use common_catalog::consts::default_engine; use itertools::Itertools; @@ -24,11 +25,12 @@ use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::IsOptional::Mandatory; use sqlparser::parser::{Parser, ParserError}; use sqlparser::tokenizer::{Token, TokenWithLocation, Word}; +use table::requests::valid_table_option; use crate::ast::{ColumnDef, Ident, TableConstraint, Value as SqlValue}; use crate::error::{ - self, InvalidColumnOptionSnafu, InvalidTimeIndexSnafu, MissingTimeIndexSnafu, Result, - SyntaxSnafu, + self, InvalidColumnOptionSnafu, InvalidTableOptionSnafu, InvalidTimeIndexSnafu, + MissingTimeIndexSnafu, Result, SyntaxSnafu, }; use crate::parser::ParserContext; use crate::statements::create::{ @@ -91,8 +93,15 @@ impl<'a> ParserContext<'a> { None } }) - .collect(); - + .collect::>(); + for key in options.keys() { + ensure!( + valid_table_option(key), + InvalidTableOptionSnafu { + key: key.to_string() + } + ); + } Ok(Statement::CreateExternalTable(CreateExternalTable { name: table_name, columns, @@ -149,7 +158,14 @@ impl<'a> ParserContext<'a> { .parser .parse_options(Keyword::WITH) .context(error::SyntaxSnafu { sql: self.sql })?; - + for option in options.iter() { + ensure!( + valid_table_option(&option.name.value), + InvalidTableOptionSnafu { + key: option.name.value.to_string() + } + ); + } let create_table = CreateTable { if_not_exists, name: table_name, @@ -807,6 +823,24 @@ mod tests { use super::*; use crate::dialect::GreptimeDbDialect; + #[test] + fn test_validate_external_table_options() { + let sql = "CREATE EXTERNAL TABLE city ( + host string, + ts int64, + cpu float64 default 0, + memory float64, + TIME INDEX (ts), + PRIMARY KEY(ts, host) + ) with(location='/var/data/city.csv',format='csv',foo='bar');"; + + let result = ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}); + assert!(matches!( + result, + Err(error::Error::InvalidTableOption { .. }) + )); + } + #[test] fn test_parse_create_external_table() { struct Test<'a> { diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index 06bf185371c8..3bf1fe260b8f 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -222,6 +222,7 @@ pub struct CreateExternalTable { #[cfg(test)] mod tests { use crate::dialect::GreptimeDbDialect; + use crate::error::Error::InvalidTableOption; use crate::parser::ParserContext; use crate::statements::statement::Statement; @@ -319,4 +320,26 @@ ENGINE=mito _ => unreachable!(), } } + + #[test] + fn test_validate_table_options() { + let sql = r"create table if not exists demo( + host string, + ts bigint, + cpu double default 0, + memory double, + TIME INDEX (ts), + PRIMARY KEY(ts, host) + ) + PARTITION BY RANGE COLUMNS (ts) ( + PARTITION r0 VALUES LESS THAN (5), + PARTITION r1 VALUES LESS THAN (9), + PARTITION r2 VALUES LESS THAN (MAXVALUE), + ) + engine=mito + with(regions=1, ttl='7d', hello='world'); +"; + let result = ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}); + assert!(matches!(result, Err(InvalidTableOption { .. }))) + } } diff --git a/src/table/Cargo.toml b/src/table/Cargo.toml index 168548b9ad92..245ef06e25dd 100644 --- a/src/table/Cargo.toml +++ b/src/table/Cargo.toml @@ -13,6 +13,7 @@ async-trait = "0.1" chrono.workspace = true common-base = { workspace = true } common-catalog = { workspace = true } +common-datasource = { workspace = true } common-error = { workspace = true } common-procedure = { workspace = true } common-query = { workspace = true } diff --git a/src/table/src/requests.rs b/src/table/src/requests.rs index 7ca269c68d82..5407cbac3959 100644 --- a/src/table/src/requests.rs +++ b/src/table/src/requests.rs @@ -19,6 +19,7 @@ use std::str::FromStr; use std::time::Duration; use common_base::readable_size::ReadableSize; +use common_datasource::object_store::s3::is_supported_in_s3; use common_query::AddColumnLocation; use common_time::range::TimestampRange; use datatypes::prelude::VectorRef; @@ -332,6 +333,18 @@ macro_rules! meter_insert_request { }; } +pub fn valid_table_option(key: &str) -> bool { + matches!( + key, + IMMUTABLE_TABLE_LOCATION_KEY + | IMMUTABLE_TABLE_FORMAT_KEY + | IMMUTABLE_TABLE_PATTERN_KEY + | WRITE_BUFFER_SIZE_KEY + | TTL_KEY + | REGIONS_KEY + ) | is_supported_in_s3(key) +} + #[derive(Debug, Clone, Default, Deserialize, Serialize)] pub struct CopyDatabaseRequest { pub catalog_name: String, @@ -346,6 +359,17 @@ pub struct CopyDatabaseRequest { mod tests { use super::*; + #[test] + fn test_validate_table_option() { + assert!(valid_table_option(IMMUTABLE_TABLE_LOCATION_KEY)); + assert!(valid_table_option(IMMUTABLE_TABLE_FORMAT_KEY)); + assert!(valid_table_option(IMMUTABLE_TABLE_PATTERN_KEY)); + assert!(valid_table_option(TTL_KEY)); + assert!(valid_table_option(REGIONS_KEY)); + assert!(valid_table_option(WRITE_BUFFER_SIZE_KEY)); + assert!(!valid_table_option("foo")); + } + #[test] fn test_serialize_table_options() { let options = TableOptions { diff --git a/tests-integration/src/tests/instance_test.rs b/tests-integration/src/tests/instance_test.rs index cc42e82fbe6f..c4bceefaf25f 100644 --- a/tests-integration/src/tests/instance_test.rs +++ b/tests-integration/src/tests/instance_test.rs @@ -153,6 +153,94 @@ PARTITION BY RANGE COLUMNS (ts) ( check_output_stream(output, expected).await; } +#[apply(both_instances_cases)] +async fn test_validate_external_table_options(instance: Arc) { + let frontend = instance.frontend(); + let format = "json"; + let location = find_testing_resource("/tests/data/json/various_type.json"); + let table_name = "various_type_json_with_schema"; + let sql = &format!( + r#"CREATE EXTERNAL TABLE {table_name} ( + a BIGINT NULL, + b DOUBLE NULL, + c BOOLEAN NULL, + d STRING NULL, + e TIMESTAMP(0) NULL, + f DOUBLE NULL, + g TIMESTAMP(0) NULL, + ) WITH (foo='bar', location='{location}', format='{format}');"#, + ); + + let result = try_execute_sql(&frontend, sql).await; + assert!(matches!(result, Err(Error::ParseSql { .. }))); +} + +#[apply(both_instances_cases)] +async fn test_show_create_external_table(instance: Arc) { + let fe_instance = instance.frontend(); + let format = "csv"; + let location = find_testing_resource("/tests/data/csv/various_type.csv"); + let table_name = "various_type_csv"; + + let output = execute_sql( + &fe_instance, + &format!( + r#"create external table {table_name} with (location='{location}', format='{format}');"#, + ), + ) + .await; + assert!(matches!(output, Output::AffectedRows(0))); + + let output = execute_sql(&fe_instance, &format!("show create table {table_name};")).await; + + let Output::RecordBatches(record_batches) = output else { + unreachable!() + }; + + // We can't directly test `show create table` by check_output_stream because the location name length depends on the current filesystem. + let record_batches = record_batches.iter().collect::>(); + let column = record_batches[0].column_by_name("Create Table").unwrap(); + let actual = column.get(0); + let expect = if instance.is_distributed_mode() { + format!( + r#"CREATE EXTERNAL TABLE IF NOT EXISTS "various_type_csv" ( + "c_int" BIGINT NULL, + "c_float" DOUBLE NULL, + "c_string" DOUBLE NULL, + "c_bool" BOOLEAN NULL, + "c_date" DATE NULL, + "c_datetime" TIMESTAMP(0) NULL, + +) + +ENGINE=file +WITH( + format = 'csv', + location = '{location}', + regions = 1 +)"# + ) + } else { + format!( + r#"CREATE EXTERNAL TABLE IF NOT EXISTS "various_type_csv" ( + "c_int" BIGINT NULL, + "c_float" DOUBLE NULL, + "c_string" DOUBLE NULL, + "c_bool" BOOLEAN NULL, + "c_date" DATE NULL, + "c_datetime" TIMESTAMP(0) NULL, + +) +ENGINE=file +WITH( + format = 'csv', + location = '{location}' +)"# + ) + }; + assert_eq!(actual.to_string(), expect); +} + #[apply(both_instances_cases)] async fn test_issue477_same_table_name_in_different_databases(instance: Arc) { let instance = instance.frontend(); diff --git a/tests/cases/distributed/show/show_create.result b/tests/cases/distributed/show/show_create.result index 2789ae27cc2b..a90dbd0914d8 100644 --- a/tests/cases/distributed/show/show_create.result +++ b/tests/cases/distributed/show/show_create.result @@ -13,7 +13,11 @@ PARTITION BY RANGE COLUMNS (n) ( PARTITION r1 VALUES LESS THAN (9), PARTITION r2 VALUES LESS THAN (MAXVALUE), ) -ENGINE=mito; +ENGINE=mito +WITH( + ttl = '7d', + write_buffer_size = 1024 +); Affected Rows: 0 @@ -39,7 +43,9 @@ SHOW CREATE TABLE system_metrics; | | ) | | | ENGINE=mito | | | WITH( | -| | regions = 3 | +| | regions = 3, | +| | ttl = '7days', | +| | write_buffer_size = '1.0KiB' | | | ) | +----------------+-----------------------------------------------------------+ @@ -73,3 +79,27 @@ drop table table_without_partition; Affected Rows: 1 +CREATE TABLE not_supported_table_options_keys ( + id INT UNSIGNED, + host STRING, + cpu DOUBLE, + disk FLOAT, + n INT COMMENT 'range key', + ts TIMESTAMP NOT NULL DEFAULT current_timestamp(), + TIME INDEX (ts), + PRIMARY KEY (id, host) +) +PARTITION BY RANGE COLUMNS (n) ( + PARTITION r0 VALUES LESS THAN (5), + PARTITION r1 VALUES LESS THAN (9), + PARTITION r2 VALUES LESS THAN (MAXVALUE), +) +ENGINE=mito +WITH( + foo = 123, + ttl = '7d', + write_buffer_size = 1024 +); + +Error: 1004(InvalidArguments), Invalid table option key: foo + diff --git a/tests/cases/distributed/show/show_create.sql b/tests/cases/distributed/show/show_create.sql index cdb6444eeefe..9435f8729653 100644 --- a/tests/cases/distributed/show/show_create.sql +++ b/tests/cases/distributed/show/show_create.sql @@ -13,7 +13,11 @@ PARTITION BY RANGE COLUMNS (n) ( PARTITION r1 VALUES LESS THAN (9), PARTITION r2 VALUES LESS THAN (MAXVALUE), ) -ENGINE=mito; +ENGINE=mito +WITH( + ttl = '7d', + write_buffer_size = 1024 +); SHOW CREATE TABLE system_metrics; @@ -26,3 +30,25 @@ create table table_without_partition ( show create table table_without_partition; drop table table_without_partition; + +CREATE TABLE not_supported_table_options_keys ( + id INT UNSIGNED, + host STRING, + cpu DOUBLE, + disk FLOAT, + n INT COMMENT 'range key', + ts TIMESTAMP NOT NULL DEFAULT current_timestamp(), + TIME INDEX (ts), + PRIMARY KEY (id, host) +) +PARTITION BY RANGE COLUMNS (n) ( + PARTITION r0 VALUES LESS THAN (5), + PARTITION r1 VALUES LESS THAN (9), + PARTITION r2 VALUES LESS THAN (MAXVALUE), +) +ENGINE=mito +WITH( + foo = 123, + ttl = '7d', + write_buffer_size = 1024 +); diff --git a/tests/cases/standalone/show/show_create.result b/tests/cases/standalone/show/show_create.result index d370d2646da4..9c764675fbc2 100644 --- a/tests/cases/standalone/show/show_create.result +++ b/tests/cases/standalone/show/show_create.result @@ -41,3 +41,22 @@ DROP TABLE system_metrics; Affected Rows: 1 +CREATE TABLE not_supported_table_options_keys ( + id INT UNSIGNED, + host STRING, + cpu DOUBLE, + disk FLOAT, + n INT COMMENT 'range key', + ts TIMESTAMP NOT NULL DEFAULT current_timestamp(), + TIME INDEX (ts), + PRIMARY KEY (id, host) +) +ENGINE=mito +WITH( + foo = 123, + ttl = '7d', + write_buffer_size = 1024 +); + +Error: 1004(InvalidArguments), Invalid table option key: foo + diff --git a/tests/cases/standalone/show/show_create.sql b/tests/cases/standalone/show/show_create.sql index bebbd46b752b..86faf1a604aa 100644 --- a/tests/cases/standalone/show/show_create.sql +++ b/tests/cases/standalone/show/show_create.sql @@ -16,3 +16,20 @@ WITH( SHOW CREATE TABLE system_metrics; DROP TABLE system_metrics; + +CREATE TABLE not_supported_table_options_keys ( + id INT UNSIGNED, + host STRING, + cpu DOUBLE, + disk FLOAT, + n INT COMMENT 'range key', + ts TIMESTAMP NOT NULL DEFAULT current_timestamp(), + TIME INDEX (ts), + PRIMARY KEY (id, host) +) +ENGINE=mito +WITH( + foo = 123, + ttl = '7d', + write_buffer_size = 1024 +);