From 00308218b37889fa21af0960e56b134738f57fa3 Mon Sep 17 00:00:00 2001 From: Zhenchi Date: Tue, 9 Jul 2024 04:18:48 +0800 Subject: [PATCH] feat(fulltext_index): allow enable full-text index in SQL and gRPC way (#4310) * feat(fulltext_index): allow enable full-text index in SQL and gRPC way Signed-off-by: Zhenchi * fix: typo Signed-off-by: Zhenchi * chore: polish Signed-off-by: Zhenchi * fix: test_fulltext_intm_path Signed-off-by: Zhenchi * address comments Signed-off-by: Zhenchi * refactor: explicitly build column options Signed-off-by: Zhenchi * test: fix error msg Signed-off-by: Zhenchi * fix: address comments Signed-off-by: Zhenchi * fix: polish Signed-off-by: Zhenchi --------- Signed-off-by: Zhenchi --- Cargo.lock | 23 +- Cargo.toml | 2 +- src/api/Cargo.toml | 1 + src/api/src/error.rs | 12 +- src/api/src/helper.rs | 1 + src/api/src/lib.rs | 2 + src/api/src/v1/column_def.rs | 160 +++++++- src/common/grpc-expr/Cargo.toml | 1 + src/common/grpc-expr/src/error.rs | 24 ++ src/common/grpc-expr/src/insert.rs | 1 + src/common/grpc-expr/src/util.rs | 28 +- .../meta/src/ddl/create_table_template.rs | 1 + src/common/meta/src/ddl/test_util/columns.rs | 1 + src/datatypes/src/schema/column_schema.rs | 22 +- src/flow/src/adapter/util.rs | 2 + src/metric-engine/src/engine/put.rs | 2 + src/metric-engine/src/test_util.rs | 3 + src/mito2/src/request.rs | 2 + src/mito2/src/sst/index.rs | 20 +- src/mito2/src/test_util.rs | 2 + src/operator/src/expr_factory.rs | 44 +-- src/operator/src/insert.rs | 2 + src/operator/src/req_convert/common.rs | 11 + .../src/req_convert/insert/stmt_to_region.rs | 2 + .../transform/transformer/greptime/coerce.rs | 19 +- src/pipeline/src/manager/table.rs | 5 + src/pipeline/tests/gsub.rs | 1 + src/pipeline/tests/join.rs | 2 + src/pipeline/tests/on_failure.rs | 9 + src/query/src/error.rs | 8 + src/query/src/sql/show_create_table.rs | 61 ++- src/script/src/table.rs | 1 + src/servers/src/prom_row_builder.rs | 3 + src/sql/Cargo.toml | 1 + src/sql/src/error.rs | 19 +- src/sql/src/lib.rs | 4 +- src/sql/src/parser.rs | 2 +- src/sql/src/parsers/create_parser.rs | 351 ++++++++++++++---- src/sql/src/statements.rs | 158 +++++--- src/sql/src/statements/create.rs | 101 ++++- .../src/statements/transform/type_alias.rs | 6 +- src/store-api/src/metadata.rs | 42 +-- .../common/create/create_with_fulltext.result | 101 +++++ .../common/create/create_with_fulltext.sql | 45 +++ 44 files changed, 1053 insertions(+), 255 deletions(-) create mode 100644 tests/cases/standalone/common/create/create_with_fulltext.result create mode 100644 tests/cases/standalone/common/create/create_with_fulltext.sql diff --git a/Cargo.lock b/Cargo.lock index 580db0079e78..d7c18b0695cf 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -225,6 +225,7 @@ dependencies = [ "greptime-proto", "paste", "prost 0.12.6", + "serde_json", "snafu 0.8.3", "tonic-build 0.9.2", ] @@ -979,7 +980,7 @@ dependencies = [ "bitflags 2.5.0", "cexpr", "clang-sys", - "itertools 0.12.1", + "itertools 0.10.5", "lazy_static", "lazycell", "proc-macro2", @@ -2013,6 +2014,7 @@ dependencies = [ "common-time", "datatypes", "paste", + "prost 0.12.6", "snafu 0.8.3", "table", ] @@ -4201,7 +4203,7 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "greptime-proto" version = "0.1.0" -source = "git+https://github.com/GreptimeTeam/greptime-proto.git?rev=a70a6af9c69e40f9a918936a48717343402b4393#a70a6af9c69e40f9a918936a48717343402b4393" +source = "git+https://github.com/zhongzc/greptime-proto.git?branch=zhongzc/fulltext-options#6923c24096f9e8dedca9dff38a3c343c1a7cfc0c" dependencies = [ "prost 0.12.6", "serde", @@ -4656,7 +4658,7 @@ dependencies = [ "httpdate", "itoa", "pin-project-lite", - "socket2 0.4.10", + "socket2 0.5.7", "tokio", "tower-service", "tracing", @@ -6383,12 +6385,6 @@ version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5ce46fe64a9d73be07dcbe690a38ce1b293be448fd8ce1e6c1b8062c9f72c6a" -[[package]] -name = "multimap" -version = "0.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "defc4c55412d89136f966bbb339008b474350e5e6e78d2714439c386b3137a03" - [[package]] name = "mur3" version = "0.1.0" @@ -8220,7 +8216,7 @@ dependencies = [ "itertools 0.10.5", "lazy_static", "log", - "multimap 0.8.3", + "multimap", "petgraph", "prettyplease 0.1.25", "prost 0.11.9", @@ -8239,9 +8235,9 @@ checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ "bytes", "heck 0.5.0", - "itertools 0.12.1", + "itertools 0.10.5", "log", - "multimap 0.10.0", + "multimap", "once_cell", "petgraph", "prettyplease 0.2.20", @@ -8272,7 +8268,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools 0.12.1", + "itertools 0.10.5", "proc-macro2", "quote", "syn 2.0.66", @@ -10596,6 +10592,7 @@ dependencies = [ "itertools 0.10.5", "lazy_static", "regex", + "serde_json", "snafu 0.8.3", "sqlparser 0.45.0 (git+https://github.com/GreptimeTeam/sqlparser-rs.git?rev=54a267ac89c09b11c0c88934690530807185d3e7)", "sqlparser_derive 0.1.1", diff --git a/Cargo.toml b/Cargo.toml index 7f9608ceb5c1..7d4929b463aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -120,7 +120,7 @@ etcd-client = { git = "https://github.com/MichaelScofield/etcd-client.git", rev fst = "0.4.7" futures = "0.3" futures-util = "0.3" -greptime-proto = { git = "https://github.com/GreptimeTeam/greptime-proto.git", rev = "a70a6af9c69e40f9a918936a48717343402b4393" } +greptime-proto = { git = "https://github.com/zhongzc/greptime-proto.git", branch = "zhongzc/fulltext-options" } humantime = "2.1" humantime-serde = "1.1" itertools = "0.10" diff --git a/src/api/Cargo.toml b/src/api/Cargo.toml index cd1837465bd7..8acd694a29ca 100644 --- a/src/api/Cargo.toml +++ b/src/api/Cargo.toml @@ -17,6 +17,7 @@ datatypes.workspace = true greptime-proto.workspace = true paste = "1.0" prost.workspace = true +serde_json.workspace = true snafu.workspace = true [build-dependencies] diff --git a/src/api/src/error.rs b/src/api/src/error.rs index b8bd517188ed..07e43e477299 100644 --- a/src/api/src/error.rs +++ b/src/api/src/error.rs @@ -58,13 +58,23 @@ pub enum Error { location: Location, source: datatypes::error::Error, }, + + #[snafu(display("Failed to serialize JSON"))] + SerializeJson { + #[snafu(source)] + error: serde_json::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { fn status_code(&self) -> StatusCode { match self { Error::UnknownColumnDataType { .. } => StatusCode::InvalidArguments, - Error::IntoColumnDataType { .. } => StatusCode::Unexpected, + Error::IntoColumnDataType { .. } | Error::SerializeJson { .. } => { + StatusCode::Unexpected + } Error::ConvertColumnDefaultConstraint { source, .. } | Error::InvalidColumnDefaultConstraint { source, .. } => source.status_code(), } diff --git a/src/api/src/helper.rs b/src/api/src/helper.rs index 6a51aa71fe4a..d8e9c524d899 100644 --- a/src/api/src/helper.rs +++ b/src/api/src/helper.rs @@ -1843,6 +1843,7 @@ mod tests { null_mask: vec![2], datatype: ColumnDataType::Boolean as i32, datatype_extension: None, + options: None, }; assert!(is_column_type_value_eq( column1.datatype, diff --git a/src/api/src/lib.rs b/src/api/src/lib.rs index 45f3c95c99af..7670f8847c70 100644 --- a/src/api/src/lib.rs +++ b/src/api/src/lib.rs @@ -12,6 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. +#![feature(let_chains)] + pub mod error; pub mod helper; diff --git a/src/api/src/v1/column_def.rs b/src/api/src/v1/column_def.rs index 4a077d3b5451..b4d3425215c8 100644 --- a/src/api/src/v1/column_def.rs +++ b/src/api/src/v1/column_def.rs @@ -14,13 +14,19 @@ use std::collections::HashMap; -use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY}; +use datatypes::schema::{ + ColumnDefaultConstraint, ColumnSchema, FulltextOptions, COMMENT_KEY, FULLTEXT_KEY, +}; use snafu::ResultExt; use crate::error::{self, Result}; use crate::helper::ColumnDataTypeWrapper; -use crate::v1::ColumnDef; +use crate::v1::{ColumnDef, ColumnOptions, SemanticType}; +/// Key used to store fulltext options in gRPC column options. +const FULLTEXT_GRPC_KEY: &str = "fulltext"; + +/// Tries to construct a `ColumnSchema` from the given `ColumnDef`. pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { let data_type = ColumnDataTypeWrapper::try_new( column_def.data_type, @@ -43,13 +49,147 @@ pub fn try_as_column_schema(column_def: &ColumnDef) -> Result { if !column_def.comment.is_empty() { metadata.insert(COMMENT_KEY.to_string(), column_def.comment.clone()); } + if let Some(options) = column_def.options.as_ref() + && let Some(fulltext) = options.options.get(FULLTEXT_GRPC_KEY) + { + metadata.insert(FULLTEXT_KEY.to_string(), fulltext.to_string()); + } + + ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) + .with_metadata(metadata) + .with_time_index(column_def.semantic_type() == SemanticType::Timestamp) + .with_default_constraint(constraint) + .context(error::InvalidColumnDefaultConstraintSnafu { + column: &column_def.name, + }) +} + +/// Constructs a `ColumnOptions` from the given `ColumnSchema`. +pub fn options_from_column_schema(column_schema: &ColumnSchema) -> Option { + let mut options = ColumnOptions::default(); + if let Some(fulltext) = column_schema.metadata().get(FULLTEXT_KEY) { + options + .options + .insert(FULLTEXT_GRPC_KEY.to_string(), fulltext.to_string()); + } + + (!options.options.is_empty()).then_some(options) +} + +/// Checks if the `ColumnOptions` contains fulltext options. +pub fn contains_fulltext(options: &Option) -> bool { + options + .as_ref() + .map_or(false, |o| o.options.contains_key(FULLTEXT_GRPC_KEY)) +} + +/// Tries to construct a `ColumnOptions` from the given `FulltextOptions`. +pub fn options_from_fulltext(fulltext: &FulltextOptions) -> Result> { + let mut options = ColumnOptions::default(); + + let v = serde_json::to_string(fulltext).context(error::SerializeJsonSnafu)?; + options.options.insert(FULLTEXT_GRPC_KEY.to_string(), v); + + Ok((!options.options.is_empty()).then_some(options)) +} + +#[cfg(test)] +mod tests { + + use datatypes::data_type::ConcreteDataType; + use datatypes::schema::FulltextAnalyzer; + + use super::*; + use crate::v1::ColumnDataType; + + #[test] + fn test_try_as_column_schema() { + let column_def = ColumnDef { + name: "test".to_string(), + data_type: ColumnDataType::String as i32, + is_nullable: true, + default_constraint: ColumnDefaultConstraint::Value("test_default".into()) + .try_into() + .unwrap(), + semantic_type: SemanticType::Field as i32, + comment: "test_comment".to_string(), + datatype_extension: None, + options: Some(ColumnOptions { + options: HashMap::from([( + FULLTEXT_GRPC_KEY.to_string(), + "{\"enable\":true}".to_string(), + )]), + }), + }; + + let schema = try_as_column_schema(&column_def).unwrap(); + assert_eq!(schema.name, "test"); + assert_eq!(schema.data_type, ConcreteDataType::string_datatype()); + assert!(!schema.is_time_index()); + assert!(schema.is_nullable()); + assert_eq!( + schema.default_constraint().unwrap(), + &ColumnDefaultConstraint::Value("test_default".into()) + ); + assert_eq!(schema.metadata().get(COMMENT_KEY).unwrap(), "test_comment"); + assert_eq!( + schema.fulltext_options().unwrap().unwrap(), + FulltextOptions { + enable: true, + ..Default::default() + } + ); + } + + #[test] + fn test_options_from_column_schema() { + let schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true); + let options = options_from_column_schema(&schema); + assert!(options.is_none()); + + let schema = ColumnSchema::new("test", ConcreteDataType::string_datatype(), true) + .with_fulltext_options(FulltextOptions { + enable: true, + analyzer: FulltextAnalyzer::English, + case_sensitive: false, + }) + .unwrap(); + let options = options_from_column_schema(&schema).unwrap(); + assert_eq!( + options.options.get(FULLTEXT_GRPC_KEY).unwrap(), + "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}" + ); + } + + #[test] + fn test_options_with_fulltext() { + let fulltext = FulltextOptions { + enable: true, + analyzer: FulltextAnalyzer::English, + case_sensitive: false, + }; + let options = options_from_fulltext(&fulltext).unwrap().unwrap(); + assert_eq!( + options.options.get(FULLTEXT_GRPC_KEY).unwrap(), + "{\"enable\":true,\"analyzer\":\"English\",\"case-sensitive\":false}" + ); + } - Ok( - ColumnSchema::new(&column_def.name, data_type.into(), column_def.is_nullable) - .with_default_constraint(constraint) - .context(error::InvalidColumnDefaultConstraintSnafu { - column: &column_def.name, - })? - .with_metadata(metadata), - ) + #[test] + fn test_contains_fulltext() { + let options = ColumnOptions { + options: HashMap::from([( + FULLTEXT_GRPC_KEY.to_string(), + "{\"enable\":true}".to_string(), + )]), + }; + assert!(contains_fulltext(&Some(options))); + + let options = ColumnOptions { + options: HashMap::new(), + }; + assert!(!contains_fulltext(&Some(options))); + + assert!(!contains_fulltext(&None)); + } } diff --git a/src/common/grpc-expr/Cargo.toml b/src/common/grpc-expr/Cargo.toml index ce98a6396da2..246a999a8499 100644 --- a/src/common/grpc-expr/Cargo.toml +++ b/src/common/grpc-expr/Cargo.toml @@ -16,6 +16,7 @@ common-macro.workspace = true common-query.workspace = true common-time.workspace = true datatypes.workspace = true +prost.workspace = true snafu.workspace = true table.workspace = true diff --git a/src/common/grpc-expr/src/error.rs b/src/common/grpc-expr/src/error.rs index 378473f22eaa..2f27c08bbe41 100644 --- a/src/common/grpc-expr/src/error.rs +++ b/src/common/grpc-expr/src/error.rs @@ -14,6 +14,7 @@ use std::any::Any; +use api::v1::ColumnDataType; use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; @@ -104,6 +105,25 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Unknown proto column datatype: {}", datatype))] + UnknownColumnDataType { + datatype: i32, + #[snafu(implicit)] + location: Location, + #[snafu(source)] + error: prost::DecodeError, + }, + + #[snafu(display( + "Fulltext index only supports string type, column: {column_name}, unexpected type: {column_type:?}" + ))] + InvalidFulltextColumnType { + column_name: String, + column_type: ColumnDataType, + #[snafu(implicit)] + location: Location, + }, } pub type Result = std::result::Result; @@ -124,6 +144,10 @@ impl ErrorExt for Error { Error::UnexpectedValuesLength { .. } | Error::UnknownLocationType { .. } => { StatusCode::InvalidArguments } + + Error::UnknownColumnDataType { .. } | Error::InvalidFulltextColumnType { .. } => { + StatusCode::InvalidArguments + } } } diff --git a/src/common/grpc-expr/src/insert.rs b/src/common/grpc-expr/src/insert.rs index d8d6ae7736d2..6170c5300858 100644 --- a/src/common/grpc-expr/src/insert.rs +++ b/src/common/grpc-expr/src/insert.rs @@ -474,6 +474,7 @@ mod tests { scale: 10, })), }), + options: None, }; ( diff --git a/src/common/grpc-expr/src/util.rs b/src/common/grpc-expr/src/util.rs index 3d6c65df9004..9cbc4ad8cd15 100644 --- a/src/common/grpc-expr/src/util.rs +++ b/src/common/grpc-expr/src/util.rs @@ -14,24 +14,26 @@ use std::collections::HashSet; +use api::v1::column_def::contains_fulltext; use api::v1::{ - AddColumn, AddColumns, Column, ColumnDataTypeExtension, ColumnDef, ColumnSchema, - CreateTableExpr, SemanticType, + AddColumn, AddColumns, Column, ColumnDataType, ColumnDataTypeExtension, ColumnDef, + ColumnOptions, ColumnSchema, CreateTableExpr, SemanticType, }; use datatypes::schema::Schema; -use snafu::{ensure, OptionExt}; +use snafu::{ensure, OptionExt, ResultExt}; use table::metadata::TableId; use table::table_reference::TableReference; use crate::error::{ - DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu, MissingTimestampColumnSnafu, Result, + DuplicatedColumnNameSnafu, DuplicatedTimestampColumnSnafu, InvalidFulltextColumnTypeSnafu, + MissingTimestampColumnSnafu, Result, UnknownColumnDataTypeSnafu, }; - pub struct ColumnExpr<'a> { pub column_name: &'a str, pub datatype: i32, pub semantic_type: i32, pub datatype_extension: &'a Option, + pub options: &'a Option, } impl<'a> ColumnExpr<'a> { @@ -53,6 +55,7 @@ impl<'a> From<&'a Column> for ColumnExpr<'a> { datatype: column.datatype, semantic_type: column.semantic_type, datatype_extension: &column.datatype_extension, + options: &column.options, } } } @@ -64,6 +67,7 @@ impl<'a> From<&'a ColumnSchema> for ColumnExpr<'a> { datatype: schema.datatype, semantic_type: schema.semantic_type, datatype_extension: &schema.datatype_extension, + options: &schema.options, } } } @@ -99,6 +103,7 @@ pub fn build_create_table_expr( datatype, semantic_type, datatype_extension, + options, } in column_exprs { let mut is_nullable = true; @@ -119,6 +124,17 @@ pub fn build_create_table_expr( _ => {} } + let column_type = + ColumnDataType::try_from(datatype).context(UnknownColumnDataTypeSnafu { datatype })?; + + ensure!( + !contains_fulltext(options) || column_type == ColumnDataType::String, + InvalidFulltextColumnTypeSnafu { + column_name, + column_type, + } + ); + let column_def = ColumnDef { name: column_name.to_string(), data_type: datatype, @@ -127,6 +143,7 @@ pub fn build_create_table_expr( semantic_type, comment: String::new(), datatype_extension: datatype_extension.clone(), + options: options.clone(), }; column_defs.push(column_def); } @@ -168,6 +185,7 @@ pub fn extract_new_columns( semantic_type: expr.semantic_type, comment: String::new(), datatype_extension: expr.datatype_extension.clone(), + options: expr.options.clone(), }); AddColumn { column_def, diff --git a/src/common/meta/src/ddl/create_table_template.rs b/src/common/meta/src/ddl/create_table_template.rs index ee9165269d14..7da347bda3da 100644 --- a/src/common/meta/src/ddl/create_table_template.rs +++ b/src/common/meta/src/ddl/create_table_template.rs @@ -48,6 +48,7 @@ pub(crate) fn build_template(create_table_expr: &CreateTableExpr) -> Result for ColumnDef { semantic_type: semantic_type as i32, comment, datatype_extension: None, + options: None, } } } diff --git a/src/datatypes/src/schema/column_schema.rs b/src/datatypes/src/schema/column_schema.rs index 60b785b706d9..c3cd8b345314 100644 --- a/src/datatypes/src/schema/column_schema.rs +++ b/src/datatypes/src/schema/column_schema.rs @@ -32,7 +32,7 @@ pub const TIME_INDEX_KEY: &str = "greptime:time_index"; pub const COMMENT_KEY: &str = "greptime:storage:comment"; /// Key used to store default constraint in arrow field's metadata. const DEFAULT_CONSTRAINT_KEY: &str = "greptime:default_constraint"; -/// Key used to store fulltext options in column metadata. +/// Key used to store fulltext options in arrow field's metadata. pub const FULLTEXT_KEY: &str = "greptime:fulltext"; /// Schema of a column, used as an immutable struct. @@ -254,6 +254,14 @@ impl ColumnSchema { } } } + + pub fn with_fulltext_options(mut self, options: FulltextOptions) -> Result { + self.metadata.insert( + FULLTEXT_KEY.to_string(), + serde_json::to_string(&options).context(error::SerializeSnafu)?, + ); + Ok(self) + } } impl TryFrom<&Field> for ColumnSchema { @@ -312,12 +320,15 @@ impl TryFrom<&ColumnSchema> for Field { /// Fulltext options for a column. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, Default)] +#[serde(rename_all = "kebab-case")] pub struct FulltextOptions { /// Whether the fulltext index is enabled. pub enable: bool, /// The fulltext analyzer to use. + #[serde(default)] pub analyzer: FulltextAnalyzer, /// Whether the fulltext index is case-sensitive. + #[serde(default)] pub case_sensitive: bool, } @@ -329,6 +340,15 @@ pub enum FulltextAnalyzer { Chinese, } +impl fmt::Display for FulltextAnalyzer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + FulltextAnalyzer::English => write!(f, "English"), + FulltextAnalyzer::Chinese => write!(f, "Chinese"), + } + } +} + #[cfg(test)] mod tests { use std::sync::Arc; diff --git a/src/flow/src/adapter/util.rs b/src/flow/src/adapter/util.rs index 0a23a86167aa..a1d2895ba3be 100644 --- a/src/flow/src/adapter/util.rs +++ b/src/flow/src/adapter/util.rs @@ -13,6 +13,7 @@ // limitations under the License. use api::helper::ColumnDataTypeWrapper; +use api::v1::column_def::options_from_column_schema; use api::v1::{ColumnDataType, ColumnDataTypeExtension, SemanticType}; use common_error::ext::BoxedError; use datatypes::schema::ColumnSchema; @@ -53,6 +54,7 @@ pub fn column_schemas_to_proto( datatype: datatype.0 as i32, semantic_type, datatype_extension: datatype.1, + options: options_from_column_schema(schema), } }) .collect(); diff --git a/src/metric-engine/src/engine/put.rs b/src/metric-engine/src/engine/put.rs index 1768d141cb83..f5d9c331a927 100644 --- a/src/metric-engine/src/engine/put.rs +++ b/src/metric-engine/src/engine/put.rs @@ -158,6 +158,7 @@ impl MetricEngineInner { datatype: ColumnDataType::Uint32 as i32, semantic_type: SemanticType::Tag as _, datatype_extension: None, + options: None, }); // add tsid column rows.schema.push(ColumnSchema { @@ -165,6 +166,7 @@ impl MetricEngineInner { datatype: ColumnDataType::Uint64 as i32, semantic_type: SemanticType::Tag as _, datatype_extension: None, + options: None, }); // fill internal columns diff --git a/src/metric-engine/src/test_util.rs b/src/metric-engine/src/test_util.rs index 79a523bb5758..71c35b6119cc 100644 --- a/src/metric-engine/src/test_util.rs +++ b/src/metric-engine/src/test_util.rs @@ -245,12 +245,14 @@ pub fn row_schema_with_tags(tags: &[&str]) -> Vec { datatype: ColumnDataType::TimestampMillisecond as i32, semantic_type: SemanticType::Timestamp as _, datatype_extension: None, + options: None, }, PbColumnSchema { column_name: "greptime_value".to_string(), datatype: ColumnDataType::Float64 as i32, semantic_type: SemanticType::Field as _, datatype_extension: None, + options: None, }, ]; for tag in tags { @@ -259,6 +261,7 @@ pub fn row_schema_with_tags(tags: &[&str]) -> Vec { datatype: ColumnDataType::String as i32, semantic_type: SemanticType::Tag as _, datatype_extension: None, + options: None, }); } schema diff --git a/src/mito2/src/request.rs b/src/mito2/src/request.rs index 25f8a6985d42..780c85b1d1e0 100644 --- a/src/mito2/src/request.rs +++ b/src/mito2/src/request.rs @@ -22,6 +22,7 @@ use api::helper::{ is_column_type_value_eq, is_semantic_type_eq, proto_value_type, to_proto_value, ColumnDataTypeWrapper, }; +use api::v1::column_def::options_from_column_schema; use api::v1::{ColumnDataType, ColumnSchema, OpType, Rows, SemanticType, Value}; use common_telemetry::info; use datatypes::prelude::DataType; @@ -270,6 +271,7 @@ impl WriteRequest { datatype: datatype as i32, semantic_type: column.semantic_type as i32, datatype_extension: datatype_ext, + options: options_from_column_schema(&column.column_schema), }); Ok(()) diff --git a/src/mito2/src/sst/index.rs b/src/mito2/src/sst/index.rs index 5a55cbf7b626..2407a974c107 100644 --- a/src/mito2/src/sst/index.rs +++ b/src/mito2/src/sst/index.rs @@ -289,7 +289,7 @@ mod tests { use api::v1::SemanticType; use datatypes::data_type::ConcreteDataType; - use datatypes::schema::{ColumnSchema, FulltextOptions, FULLTEXT_KEY}; + use datatypes::schema::{ColumnSchema, FulltextOptions}; use object_store::services::Memory; use object_store::ObjectStore; use puffin_manager::PuffinManagerFactory; @@ -340,17 +340,13 @@ mod tests { } if with_fulltext { - let opts = serde_json::to_string(&FulltextOptions { - enable: true, - ..Default::default() - }) - .unwrap(); - - let mut column_schema = - ColumnSchema::new("text", ConcreteDataType::string_datatype(), true); - column_schema - .mut_metadata() - .insert(FULLTEXT_KEY.to_string(), opts); + let column_schema = + ColumnSchema::new("text", ConcreteDataType::string_datatype(), true) + .with_fulltext_options(FulltextOptions { + enable: true, + ..Default::default() + }) + .unwrap(); let column = ColumnMetadata { column_schema, diff --git a/src/mito2/src/test_util.rs b/src/mito2/src/test_util.rs index 0bac00714abc..08c43dd31c84 100644 --- a/src/mito2/src/test_util.rs +++ b/src/mito2/src/test_util.rs @@ -29,6 +29,7 @@ use std::sync::Arc; use api::greptime_proto::v1; use api::helper::ColumnDataTypeWrapper; +use api::v1::column_def::options_from_column_schema; use api::v1::value::ValueData; use api::v1::{OpType, Row, Rows, SemanticType}; use common_base::readable_size::ReadableSize; @@ -933,6 +934,7 @@ pub(crate) fn column_metadata_to_column_schema(metadata: &ColumnMetadata) -> api datatype: datatype as i32, semantic_type: metadata.semantic_type as i32, datatype_extension, + options: options_from_column_schema(&metadata.column_schema), } } diff --git a/src/operator/src/expr_factory.rs b/src/operator/src/expr_factory.rs index 09039853a488..db86be7e7a31 100644 --- a/src/operator/src/expr_factory.rs +++ b/src/operator/src/expr_factory.rs @@ -16,8 +16,9 @@ use std::collections::{HashMap, HashSet}; use api::helper::ColumnDataTypeWrapper; use api::v1::alter_expr::Kind; +use api::v1::column_def::options_from_column_schema; use api::v1::{ - AddColumn, AddColumns, AlterExpr, ChangeColumnType, ChangeColumnTypes, Column, ColumnDataType, + AddColumn, AddColumns, AlterExpr, ChangeColumnType, ChangeColumnTypes, ColumnDataType, ColumnDataTypeExtension, CreateFlowExpr, CreateTableExpr, CreateViewExpr, DropColumn, DropColumns, ExpireAfter, RenameTable, SemanticType, TableName, }; @@ -34,13 +35,13 @@ use query::sql::{ use session::context::QueryContextRef; use session::table_name::table_idents_to_full_name; use snafu::{ensure, OptionExt, ResultExt}; -use sql::ast::{ColumnDef, ColumnOption, TableConstraint}; +use sql::ast::{ColumnOption, TableConstraint}; use sql::statements::alter::{AlterTable, AlterTableOperation}; use sql::statements::create::{ - CreateExternalTable, CreateFlow, CreateTable, CreateView, TIME_INDEX, + Column as SqlColumn, CreateExternalTable, CreateFlow, CreateTable, CreateView, TIME_INDEX, }; use sql::statements::{ - column_def_to_schema, sql_column_def_to_grpc_column_def, sql_data_type_to_concrete_data_type, + column_to_schema, sql_column_def_to_grpc_column_def, sql_data_type_to_concrete_data_type, }; use sql::util::extract_tables_from_query; use table::requests::{TableOptions, FILE_TABLE_META_KEY}; @@ -57,25 +58,6 @@ use crate::error::{ pub struct CreateExprFactory; impl CreateExprFactory { - pub fn create_table_expr_by_columns( - &self, - table_name: &TableReference<'_>, - columns: &[Column], - engine: &str, - ) -> Result { - let column_exprs = ColumnExpr::from_columns(columns); - let create_expr = common_grpc_expr::util::build_create_table_expr( - None, - table_name, - column_exprs, - engine, - "Created on insertion", - ) - .context(BuildCreateExprOnInsertionSnafu)?; - - Ok(create_expr) - } - pub fn create_table_expr_by_column_schemas( &self, table_name: &TableReference<'_>, @@ -290,13 +272,13 @@ pub fn validate_create_expr(create: &CreateTableExpr) -> Result<()> { } fn find_primary_keys( - columns: &[ColumnDef], + columns: &[SqlColumn], constraints: &[TableConstraint], ) -> Result> { let columns_pk = columns .iter() .filter_map(|x| { - if x.options.iter().any(|o| { + if x.options().iter().any(|o| { matches!( o.option, ColumnOption::Unique { @@ -305,7 +287,7 @@ fn find_primary_keys( } ) }) { - Some(x.name.value.clone()) + Some(x.name().value.clone()) } else { None } @@ -372,7 +354,7 @@ pub fn find_time_index(constraints: &[TableConstraint]) -> Result { } fn columns_to_expr( - column_defs: &[ColumnDef], + column_defs: &[SqlColumn], time_index: &str, primary_keys: &[String], timezone: Option<&Timezone>, @@ -382,15 +364,14 @@ fn columns_to_expr( } fn columns_to_column_schemas( - column_defs: &[ColumnDef], + columns: &[SqlColumn], time_index: &str, timezone: Option<&Timezone>, ) -> Result> { - column_defs + columns .iter() .map(|c| { - column_def_to_schema(c, c.name.to_string() == time_index, timezone) - .context(ParseSqlSnafu) + column_to_schema(c, c.name().to_string() == time_index, timezone).context(ParseSqlSnafu) }) .collect::>>() } @@ -442,6 +423,7 @@ pub fn column_schemas_to_defs( semantic_type, comment, datatype_extension: datatype.1, + options: options_from_column_schema(schema), }) }) .collect() diff --git a/src/operator/src/insert.rs b/src/operator/src/insert.rs index 07d01e0d4029..1ea03f2f1ec5 100644 --- a/src/operator/src/insert.rs +++ b/src/operator/src/insert.rs @@ -563,12 +563,14 @@ impl Inserter { datatype: ColumnDataType::TimestampMillisecond as _, semantic_type: SemanticType::Timestamp as _, datatype_extension: None, + options: None, }, ColumnSchema { column_name: GREPTIME_VALUE.to_string(), datatype: ColumnDataType::Float64 as _, semantic_type: SemanticType::Field as _, datatype_extension: None, + options: None, }, ]; let create_table_expr = &mut build_create_table_expr(&table_reference, &default_schema)?; diff --git a/src/operator/src/req_convert/common.rs b/src/operator/src/req_convert/common.rs index 0073e934b993..3994b32fc7f7 100644 --- a/src/operator/src/req_convert/common.rs +++ b/src/operator/src/req_convert/common.rs @@ -17,6 +17,7 @@ pub(crate) mod partitioner; use std::collections::HashMap; use api::helper::ColumnDataTypeWrapper; +use api::v1::column_def::options_from_column_schema; use api::v1::value::ValueData; use api::v1::{Column, ColumnDataType, ColumnSchema, Row, Rows, SemanticType, Value}; use common_base::BitVec; @@ -46,6 +47,7 @@ pub fn columns_to_rows(columns: Vec, row_count: u32) -> Result { datatype: column.datatype, semantic_type: column.semantic_type, datatype_extension: column.datatype_extension.clone(), + options: column.options.clone(), }; schema.push(column_schema); @@ -196,11 +198,20 @@ pub fn column_schema( .context(ColumnDataTypeSnafu)? .to_parts(); + let column_schema = table_info + .meta + .schema + .column_schema_by_name(column_name) + .context(ColumnNotFoundSnafu { + msg: format!("unable to find column {column_name} in table schema"), + })?; + Ok(ColumnSchema { column_name: column_name.clone(), datatype: datatype as i32, semantic_type: semantic_type(table_info, column_name)?.into(), datatype_extension, + options: options_from_column_schema(column_schema), }) }) .collect::>>() diff --git a/src/operator/src/req_convert/insert/stmt_to_region.rs b/src/operator/src/req_convert/insert/stmt_to_region.rs index 5317f92dd7c8..71f107020d62 100644 --- a/src/operator/src/req_convert/insert/stmt_to_region.rs +++ b/src/operator/src/req_convert/insert/stmt_to_region.rs @@ -13,6 +13,7 @@ // limitations under the License. use api::helper::{value_to_grpc_value, ColumnDataTypeWrapper}; +use api::v1::column_def::options_from_column_schema; use api::v1::region::InsertRequests as RegionInsertRequests; use api::v1::{ColumnSchema as GrpcColumnSchema, Row, Rows, Value as GrpcValue}; use catalog::CatalogManager; @@ -116,6 +117,7 @@ impl<'a> StatementToRegion<'a> { datatype: datatype.into(), semantic_type: semantic_type.into(), datatype_extension, + options: options_from_column_schema(column_schema), }; schema.push(grpc_column_schema); diff --git a/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs b/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs index 49e008e438ea..61b691ff9406 100644 --- a/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs +++ b/src/pipeline/src/etl/transform/transformer/greptime/coerce.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use api::v1::column_def::options_from_fulltext; +use api::v1::ColumnOptions; +use datatypes::schema::FulltextOptions; use greptime_proto::v1::value::ValueData; use greptime_proto::v1::{ColumnDataType, ColumnSchema, SemanticType}; @@ -71,6 +74,7 @@ pub(crate) fn coerce_columns(transform: &Transform) -> Result, datatype, semantic_type, datatype_extension: None, + options: coerce_options(transform)?, }; columns.push(column); } @@ -82,8 +86,19 @@ fn coerce_semantic_type(transform: &Transform) -> SemanticType { match transform.index { Some(Index::Tag) => SemanticType::Tag, Some(Index::Timestamp) => SemanticType::Timestamp, - Some(Index::Fulltext) => unimplemented!("Fulltext"), - None => SemanticType::Field, + Some(Index::Fulltext) | None => SemanticType::Field, + } +} + +fn coerce_options(transform: &Transform) -> Result, String> { + if let Some(Index::Fulltext) = transform.index { + options_from_fulltext(&FulltextOptions { + enable: true, + ..Default::default() + }) + .map_err(|e| e.to_string()) + } else { + Ok(None) } } diff --git a/src/pipeline/src/manager/table.rs b/src/pipeline/src/manager/table.rs index d3197123cce5..58df2bcabb9b 100644 --- a/src/pipeline/src/manager/table.rs +++ b/src/pipeline/src/manager/table.rs @@ -113,6 +113,7 @@ impl PipelineTable { semantic_type: SemanticType::Tag as i32, comment: "".to_string(), datatype_extension: None, + options: None, }, ColumnDef { name: PIPELINE_TABLE_PIPELINE_SCHEMA_COLUMN_NAME.to_string(), @@ -122,6 +123,7 @@ impl PipelineTable { semantic_type: SemanticType::Tag as i32, comment: "".to_string(), datatype_extension: None, + options: None, }, ColumnDef { name: PIPELINE_TABLE_PIPELINE_CONTENT_TYPE_COLUMN_NAME.to_string(), @@ -131,6 +133,7 @@ impl PipelineTable { semantic_type: SemanticType::Tag as i32, comment: "".to_string(), datatype_extension: None, + options: None, }, ColumnDef { name: PIPELINE_TABLE_PIPELINE_CONTENT_COLUMN_NAME.to_string(), @@ -140,6 +143,7 @@ impl PipelineTable { semantic_type: SemanticType::Field as i32, comment: "".to_string(), datatype_extension: None, + options: None, }, ColumnDef { name: PIPELINE_TABLE_CREATED_AT_COLUMN_NAME.to_string(), @@ -149,6 +153,7 @@ impl PipelineTable { semantic_type: SemanticType::Timestamp as i32, comment: "".to_string(), datatype_extension: None, + options: None, }, ], ) diff --git a/src/pipeline/tests/gsub.rs b/src/pipeline/tests/gsub.rs index f1209a6f8830..0c527b87ce70 100644 --- a/src/pipeline/tests/gsub.rs +++ b/src/pipeline/tests/gsub.rs @@ -54,6 +54,7 @@ transform: datatype: ColumnDataType::TimestampMillisecond.into(), semantic_type: SemanticType::Timestamp.into(), datatype_extension: None, + options: None, }]; assert_eq!(output.schema, expected_schema); diff --git a/src/pipeline/tests/join.rs b/src/pipeline/tests/join.rs index b7c8c627d247..302da13c79fd 100644 --- a/src/pipeline/tests/join.rs +++ b/src/pipeline/tests/join.rs @@ -37,12 +37,14 @@ lazy_static! { datatype: ColumnDataType::String.into(), semantic_type: SemanticType::Field.into(), datatype_extension: None, + options: None, }, ColumnSchema { column_name: "greptime_timestamp".to_string(), datatype: ColumnDataType::TimestampNanosecond.into(), semantic_type: SemanticType::Timestamp.into(), datatype_extension: None, + options: None, }, ]; } diff --git a/src/pipeline/tests/on_failure.rs b/src/pipeline/tests/on_failure.rs index c0d69f4415ad..199f8a1606db 100644 --- a/src/pipeline/tests/on_failure.rs +++ b/src/pipeline/tests/on_failure.rs @@ -45,12 +45,14 @@ transform: datatype: ColumnDataType::Uint8.into(), semantic_type: SemanticType::Field.into(), datatype_extension: None, + options: None, }, ColumnSchema { column_name: "greptime_timestamp".to_string(), datatype: ColumnDataType::TimestampNanosecond.into(), semantic_type: SemanticType::Timestamp.into(), datatype_extension: None, + options: None, }, ]; @@ -88,12 +90,14 @@ transform: datatype: ColumnDataType::Uint8.into(), semantic_type: SemanticType::Field.into(), datatype_extension: None, + options: None, }, ColumnSchema { column_name: "greptime_timestamp".to_string(), datatype: ColumnDataType::TimestampNanosecond.into(), semantic_type: SemanticType::Timestamp.into(), datatype_extension: None, + options: None, }, ]; @@ -126,12 +130,14 @@ transform: datatype: ColumnDataType::Uint8.into(), semantic_type: SemanticType::Field.into(), datatype_extension: None, + options: None, }, ColumnSchema { column_name: "greptime_timestamp".to_string(), datatype: ColumnDataType::TimestampNanosecond.into(), semantic_type: SemanticType::Timestamp.into(), datatype_extension: None, + options: None, }, ]; @@ -175,18 +181,21 @@ transform: datatype: ColumnDataType::Uint8.into(), semantic_type: SemanticType::Field.into(), datatype_extension: None, + options: None, }, ColumnSchema { column_name: "spec_version".to_string(), datatype: ColumnDataType::Uint16.into(), semantic_type: SemanticType::Field.into(), datatype_extension: None, + options: None, }, ColumnSchema { column_name: "greptime_timestamp".to_string(), datatype: ColumnDataType::TimestampNanosecond.into(), semantic_type: SemanticType::Timestamp.into(), datatype_extension: None, + options: None, }, ]; diff --git a/src/query/src/error.rs b/src/query/src/error.rs index 74316a66a767..f8824860351a 100644 --- a/src/query/src/error.rs +++ b/src/query/src/error.rs @@ -314,6 +314,13 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to get fulltext options"))] + GetFulltextOptions { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -364,6 +371,7 @@ impl ErrorExt for Error { MissingTableMutationHandler { .. } => StatusCode::Unexpected, GetRegionMetadata { .. } => StatusCode::Internal, TableReadOnly { .. } => StatusCode::Unsupported, + GetFulltextOptions { source, .. } => source.status_code(), } } diff --git a/src/query/src/sql/show_create_table.rs b/src/query/src/sql/show_create_table.rs index 74150465f1bc..2c560bd36013 100644 --- a/src/query/src/sql/show_create_table.rs +++ b/src/query/src/sql/show_create_table.rs @@ -14,6 +14,8 @@ //! Implementation of `SHOW CREATE TABLE` statement. +use std::collections::HashMap; + use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, SchemaRef, COMMENT_KEY}; use humantime::format_duration; use snafu::ResultExt; @@ -22,14 +24,17 @@ use sql::ast::{ }; use sql::dialect::GreptimeDbDialect; use sql::parser::ParserContext; -use sql::statements::create::{CreateTable, TIME_INDEX}; +use sql::statements::create::{Column, ColumnExtensions, CreateTable, TIME_INDEX}; use sql::statements::{self, OptionMap}; +use sql::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; use sqlparser::ast::KeyOrIndexDisplay; use store_api::metric_engine_consts::{is_metric_engine, is_metric_engine_internal_column}; use table::metadata::{TableInfoRef, TableMeta}; use table::requests::{FILE_TABLE_META_KEY, TTL_KEY, WRITE_BUFFER_SIZE_KEY}; -use crate::error::{ConvertSqlTypeSnafu, ConvertSqlValueSnafu, Result, SqlSnafu}; +use crate::error::{ + ConvertSqlTypeSnafu, ConvertSqlValueSnafu, GetFulltextOptionsSnafu, Result, SqlSnafu, +}; fn create_sql_options(table_meta: &TableMeta) -> OptionMap { let table_opts = &table_meta.options; @@ -58,9 +63,10 @@ fn column_option_def(option: ColumnOption) -> ColumnOptionDef { ColumnOptionDef { name: None, option } } -fn create_column_def(column_schema: &ColumnSchema, quote_style: char) -> Result { +fn create_column(column_schema: &ColumnSchema, quote_style: char) -> Result { let name = &column_schema.name; let mut options = Vec::with_capacity(2); + let mut extensions = ColumnExtensions::default(); if column_schema.is_nullable() { options.push(column_option_def(ColumnOption::Null)); @@ -86,14 +92,35 @@ fn create_column_def(column_schema: &ColumnSchema, quote_style: char) -> Result< options.push(column_option_def(ColumnOption::Comment(c.to_string()))); } - Ok(ColumnDef { - name: Ident::with_quote(quote_style, name), - data_type: statements::concrete_data_type_to_sql_data_type(&column_schema.data_type) - .with_context(|_| ConvertSqlTypeSnafu { - datatype: column_schema.data_type.clone(), - })?, - collation: None, - options, + if let Some(opt) = column_schema + .fulltext_options() + .context(GetFulltextOptionsSnafu)? + && opt.enable + { + let map = HashMap::from([ + ( + COLUMN_FULLTEXT_OPT_KEY_ANALYZER.to_string(), + opt.analyzer.to_string(), + ), + ( + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE.to_string(), + opt.case_sensitive.to_string(), + ), + ]); + extensions.fulltext_options = Some(map.into()); + } + + Ok(Column { + column_def: ColumnDef { + name: Ident::with_quote(quote_style, name), + data_type: statements::concrete_data_type_to_sql_data_type(&column_schema.data_type) + .with_context(|_| ConvertSqlTypeSnafu { + datatype: column_schema.data_type.clone(), + })?, + collation: None, + options, + }, + extensions, }) } @@ -154,7 +181,7 @@ pub fn create_table_stmt(table_info: &TableInfoRef, quote_style: char) -> Result if is_metric_engine && is_metric_engine_internal_column(&c.name) { None } else { - Some(create_column_def(c, quote_style)) + Some(create_column(c, quote_style)) } }) .collect::>>()?; @@ -179,7 +206,7 @@ mod tests { use common_time::timestamp::TimeUnit; use datatypes::prelude::ConcreteDataType; - use datatypes::schema::{Schema, SchemaRef}; + use datatypes::schema::{FulltextOptions, Schema, SchemaRef}; use table::metadata::*; use table::requests::{ TableOptions, FILE_TABLE_FORMAT_KEY, FILE_TABLE_LOCATION_KEY, FILE_TABLE_META_KEY, @@ -194,6 +221,12 @@ mod tests { ColumnSchema::new("host", ConcreteDataType::string_datatype(), true), ColumnSchema::new("cpu", ConcreteDataType::float64_datatype(), true), ColumnSchema::new("disk", ConcreteDataType::float32_datatype(), true), + ColumnSchema::new("msg", ConcreteDataType::string_datatype(), true) + .with_fulltext_options(FulltextOptions { + enable: true, + ..Default::default() + }) + .unwrap(), ColumnSchema::new( "ts", ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond), @@ -205,6 +238,7 @@ mod tests { .unwrap() .with_time_index(true), ]; + let table_schema = SchemaRef::new(Schema::new(schema)); let table_name = "system_metrics"; let schema_name = "public".to_string(); @@ -247,6 +281,7 @@ CREATE TABLE IF NOT EXISTS "system_metrics" ( "host" STRING NULL, "cpu" DOUBLE NULL, "disk" FLOAT NULL, + "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'), "ts" TIMESTAMP(3) NOT NULL DEFAULT current_timestamp(), TIME INDEX ("ts"), PRIMARY KEY ("id", "host") diff --git a/src/script/src/table.rs b/src/script/src/table.rs index 5eacf1ff7ce9..bbc04e6f64a7 100644 --- a/src/script/src/table.rs +++ b/src/script/src/table.rs @@ -369,6 +369,7 @@ pub fn build_scripts_schema() -> (String, Vec, Vec) { semantic_type: c.semantic_type, comment: "".to_string(), datatype_extension: None, + options: c.options, }) .collect(); diff --git a/src/servers/src/prom_row_builder.rs b/src/servers/src/prom_row_builder.rs index d52fa547f8f3..97f2c47712d4 100644 --- a/src/servers/src/prom_row_builder.rs +++ b/src/servers/src/prom_row_builder.rs @@ -97,6 +97,7 @@ impl TableBuilder { datatype: ColumnDataType::TimestampMillisecond as i32, semantic_type: SemanticType::Timestamp as i32, datatype_extension: None, + options: None, }); schema.push(ColumnSchema { @@ -104,6 +105,7 @@ impl TableBuilder { datatype: ColumnDataType::Float64 as i32, semantic_type: SemanticType::Field as i32, datatype_extension: None, + options: None, }); Self { @@ -159,6 +161,7 @@ impl TableBuilder { datatype: ColumnDataType::String as i32, semantic_type: SemanticType::Tag as i32, datatype_extension: None, + options: None, }); row.push(Value { value_data: tag_value, diff --git a/src/sql/Cargo.toml b/src/sql/Cargo.toml index c33688d8ed48..a9ed77e8ea89 100644 --- a/src/sql/Cargo.toml +++ b/src/sql/Cargo.toml @@ -28,6 +28,7 @@ iso8601 = "0.6.1" itertools.workspace = true lazy_static.workspace = true regex.workspace = true +serde_json.workspace = true snafu.workspace = true sqlparser.workspace = true sqlparser_derive = "0.1" diff --git a/src/sql/src/error.rs b/src/sql/src/error.rs index ed88a8826a59..18453f8b3099 100644 --- a/src/sql/src/error.rs +++ b/src/sql/src/error.rs @@ -252,6 +252,20 @@ pub enum Error { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Invalid fulltext option: {}", msg))] + FulltextInvalidOption { + msg: String, + #[snafu(implicit)] + location: Location, + }, + + #[snafu(display("Failed to set fulltext option"))] + SetFulltextOption { + source: datatypes::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for Error { @@ -285,12 +299,15 @@ impl ErrorExt for Error { | ConvertToLogicalExpression { .. } | Simplification { .. } | InvalidInterval { .. } - | PermissionDenied { .. } => StatusCode::InvalidArguments, + | PermissionDenied { .. } + | FulltextInvalidOption { .. } => StatusCode::InvalidArguments, SerializeColumnDefaultConstraint { source, .. } => source.status_code(), ConvertToGrpcDataType { source, .. } => source.status_code(), ConvertToDfStatement { .. } => StatusCode::Internal, ConvertSqlValue { .. } | ConvertValue { .. } => StatusCode::Unsupported, + + SetFulltextOption { .. } => StatusCode::Unexpected, } } diff --git a/src/sql/src/lib.rs b/src/sql/src/lib.rs index eda9e7b806b3..283ebb50ec32 100644 --- a/src/sql/src/lib.rs +++ b/src/sql/src/lib.rs @@ -25,6 +25,8 @@ pub mod parsers; pub mod statements; pub mod util; -pub use parsers::create_parser::{ENGINE, MAXVALUE}; +pub use parsers::create_parser::{ + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, ENGINE, MAXVALUE, +}; pub use parsers::tql_parser::TQL; pub use statements::create::TIME_INDEX; diff --git a/src/sql/src/parser.rs b/src/sql/src/parser.rs index 01aa0ef39336..6bdc8f8d23e3 100644 --- a/src/sql/src/parser.rs +++ b/src/sql/src/parser.rs @@ -314,7 +314,7 @@ mod tests { let ts_col = columns.first().unwrap(); assert_eq!( expected_type, - sql_data_type_to_concrete_data_type(&ts_col.data_type).unwrap() + sql_data_type_to_concrete_data_type(ts_col.data_type()).unwrap() ); } _ => unreachable!(), diff --git a/src/sql/src/parsers/create_parser.rs b/src/sql/src/parsers/create_parser.rs index 6cc2ff97ce53..da315123a26a 100644 --- a/src/sql/src/parsers/create_parser.rs +++ b/src/sql/src/parsers/create_parser.rs @@ -17,13 +17,14 @@ use std::collections::HashMap; use common_catalog::consts::default_engine; use datafusion_common::ScalarValue; use datatypes::arrow::datatypes::{DataType as ArrowDataType, IntervalUnit}; +use datatypes::data_type::ConcreteDataType; use itertools::Itertools; use snafu::{ensure, OptionExt, ResultExt}; use sqlparser::ast::{ColumnOption, ColumnOptionDef, DataType, Expr, KeyOrIndexDisplay}; use sqlparser::dialect::keywords::Keyword; use sqlparser::keywords::ALL_KEYWORDS; use sqlparser::parser::IsOptional::Mandatory; -use sqlparser::parser::{Parser, ParserError}; +use sqlparser::parser::Parser; use sqlparser::tokenizer::{Token, TokenWithLocation, Word}; use table::requests::validate_table_option; @@ -31,16 +32,18 @@ use super::utils; use crate::ast::{ColumnDef, Ident, TableConstraint}; use crate::error::{ self, InvalidColumnOptionSnafu, InvalidDatabaseOptionSnafu, InvalidIntervalSnafu, - InvalidTableOptionSnafu, InvalidTimeIndexSnafu, MissingTimeIndexSnafu, Result, SyntaxSnafu, - UnexpectedSnafu, UnsupportedSnafu, + InvalidSqlSnafu, InvalidTableOptionSnafu, InvalidTimeIndexSnafu, MissingTimeIndexSnafu, Result, + SyntaxSnafu, UnexpectedSnafu, UnsupportedSnafu, }; use crate::parser::{ParserContext, FLOW}; use crate::statements::create::{ - CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, CreateTableLike, CreateView, - Partitions, TIME_INDEX, + Column, ColumnExtensions, CreateDatabase, CreateExternalTable, CreateFlow, CreateTable, + CreateTableLike, CreateView, Partitions, TIME_INDEX, }; use crate::statements::statement::Statement; -use crate::statements::{get_data_type_by_alias_name, OptionMap}; +use crate::statements::{ + get_data_type_by_alias_name, sql_data_type_to_concrete_data_type, OptionMap, +}; use crate::util::parse_option_string; pub const ENGINE: &str = "ENGINE"; @@ -55,6 +58,17 @@ fn validate_database_option(key: &str) -> bool { [DB_OPT_KEY_TTL].contains(&key) } +pub const COLUMN_FULLTEXT_OPT_KEY_ANALYZER: &str = "analyzer"; +pub const COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE: &str = "case_sensitive"; + +fn validate_column_fulltext_option(key: &str) -> bool { + [ + COLUMN_FULLTEXT_OPT_KEY_ANALYZER, + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE, + ] + .contains(&key) +} + /// Parses create [table] statement impl<'a> ParserContext<'a> { pub(crate) fn parse_create(&mut self) -> Result { @@ -409,7 +423,7 @@ impl<'a> ParserContext<'a> { Ok(values) } - fn parse_columns(&mut self) -> Result<(Vec, Vec)> { + fn parse_columns(&mut self) -> Result<(Vec, Vec)> { let mut columns = vec![]; let mut constraints = vec![]; if !self.parser.consume_token(&Token::LParen) || self.parser.consume_token(&Token::RParen) { @@ -444,13 +458,13 @@ impl<'a> ParserContext<'a> { fn parse_column( &mut self, - columns: &mut Vec, + columns: &mut Vec, constraints: &mut Vec, ) -> Result<()> { - let mut column = self.parse_column_def().context(SyntaxSnafu)?; + let mut column = self.parse_column_def()?; let mut time_index_opt_idx = None; - for (index, opt) in column.options.iter().enumerate() { + for (index, opt) in column.options().iter().enumerate() { if let ColumnOption::DialectSpecific(tokens) = &opt.option { if matches!( &tokens[..], @@ -468,7 +482,7 @@ impl<'a> ParserContext<'a> { ensure!( time_index_opt_idx.is_none(), InvalidColumnOptionSnafu { - name: column.name.to_string(), + name: column.name().to_string(), msg: "duplicated time index", } ); @@ -480,7 +494,7 @@ impl<'a> ParserContext<'a> { quote_style: None, }), columns: vec![Ident { - value: column.name.value.clone(), + value: column.name().value.clone(), quote_style: None, }], characteristics: None, @@ -496,22 +510,22 @@ impl<'a> ParserContext<'a> { if let Some(index) = time_index_opt_idx { ensure!( - !column.options.contains(&ColumnOptionDef { + !column.options().contains(&ColumnOptionDef { option: ColumnOption::Null, name: None, }), InvalidColumnOptionSnafu { - name: column.name.to_string(), + name: column.name().to_string(), msg: "time index column can't be null", } ); // The timestamp type may be an alias type, we have to retrieve the actual type. - let data_type = get_real_timestamp_type(&column.data_type); + let data_type = get_unalias_type(column.data_type()); ensure!( matches!(data_type, DataType::Timestamp(_, _)), InvalidColumnOptionSnafu { - name: column.name.to_string(), + name: column.name().to_string(), msg: "time index column data type should be timestamp", } ); @@ -521,11 +535,11 @@ impl<'a> ParserContext<'a> { name: None, }; - if !column.options.contains(¬_null_opt) { - column.options.push(not_null_opt); + if !column.options().contains(¬_null_opt) { + column.mut_options().push(not_null_opt); } - let _ = column.options.remove(index); + let _ = column.mut_options().remove(index); } columns.push(column); @@ -533,58 +547,65 @@ impl<'a> ParserContext<'a> { Ok(()) } - pub fn parse_column_def(&mut self) -> std::result::Result { + pub fn parse_column_def(&mut self) -> Result { let parser = &mut self.parser; - let name = parser.parse_identifier(false)?; - if name.quote_style.is_none() && - // "ALL_KEYWORDS" are sorted. - ALL_KEYWORDS.binary_search(&name.value.to_uppercase().as_str()).is_ok() - { - return Err(ParserError::ParserError(format!( - "Cannot use keyword '{}' as column name. Hint: add quotes to the name.", - &name.value - ))); - } + let name = parser.parse_identifier(false).context(SyntaxSnafu)?; + ensure!( + !(name.quote_style.is_none() && + // "ALL_KEYWORDS" are sorted. + ALL_KEYWORDS.binary_search(&name.value.to_uppercase().as_str()).is_ok()), + InvalidSqlSnafu { + msg: format!( + "Cannot use keyword '{}' as column name. Hint: add quotes to the name.", + &name.value + ), + } + ); - let data_type = parser.parse_data_type()?; + let data_type = parser.parse_data_type().context(SyntaxSnafu)?; let collation = if parser.parse_keyword(Keyword::COLLATE) { - Some(parser.parse_object_name(false)?) + Some(parser.parse_object_name(false).context(SyntaxSnafu)?) } else { None }; let mut options = vec![]; + let mut extensions = ColumnExtensions::default(); loop { if parser.parse_keyword(Keyword::CONSTRAINT) { - let name = Some(parser.parse_identifier(false)?); + let name = Some(parser.parse_identifier(false).context(SyntaxSnafu)?); if let Some(option) = Self::parse_optional_column_option(parser)? { options.push(ColumnOptionDef { name, option }); } else { - return parser.expected( - "constraint details after CONSTRAINT ", - parser.peek_token(), - ); + return parser + .expected( + "constraint details after CONSTRAINT ", + parser.peek_token(), + ) + .context(SyntaxSnafu); } } else if let Some(option) = Self::parse_optional_column_option(parser)? { options.push(ColumnOptionDef { name: None, option }); - } else { + } else if !Self::parse_column_extensions(parser, &name, &data_type, &mut extensions)? { break; }; } - Ok(ColumnDef { - name: Self::canonicalize_identifier(name), - data_type, - collation, - options, + + Ok(Column { + column_def: ColumnDef { + name: Self::canonicalize_identifier(name), + data_type, + collation, + options, + }, + extensions, }) } - fn parse_optional_column_option( - parser: &mut Parser<'a>, - ) -> std::result::Result, ParserError> { + fn parse_optional_column_option(parser: &mut Parser<'a>) -> Result> { if parser.parse_keywords(&[Keyword::CHARACTER, Keyword::SET]) { Ok(Some(ColumnOption::CharacterSet( - parser.parse_object_name(false)?, + parser.parse_object_name(false).context(SyntaxSnafu)?, ))) } else if parser.parse_keywords(&[Keyword::NOT, Keyword::NULL]) { Ok(Some(ColumnOption::NotNull)) @@ -594,12 +615,14 @@ impl<'a> ParserContext<'a> { token: Token::SingleQuotedString(value, ..), .. } => Ok(Some(ColumnOption::Comment(value))), - unexpected => parser.expected("string", unexpected), + unexpected => parser.expected("string", unexpected).context(SyntaxSnafu), } } else if parser.parse_keyword(Keyword::NULL) { Ok(Some(ColumnOption::Null)) } else if parser.parse_keyword(Keyword::DEFAULT) { - Ok(Some(ColumnOption::Default(parser.parse_expr()?))) + Ok(Some(ColumnOption::Default( + parser.parse_expr().context(SyntaxSnafu)?, + ))) } else if parser.parse_keywords(&[Keyword::PRIMARY, Keyword::KEY]) { Ok(Some(ColumnOption::Unique { is_primary: true, @@ -629,6 +652,55 @@ impl<'a> ParserContext<'a> { } } + fn parse_column_extensions( + parser: &mut Parser<'a>, + column_name: &Ident, + column_type: &DataType, + column_extensions: &mut ColumnExtensions, + ) -> Result { + if parser.parse_keyword(Keyword::FULLTEXT) { + ensure!( + column_extensions.fulltext_options.is_none(), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "duplicated FULLTEXT option", + } + ); + + let column_type = get_unalias_type(column_type); + let data_type = sql_data_type_to_concrete_data_type(&column_type)?; + ensure!( + data_type == ConcreteDataType::string_datatype(), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: "FULLTEXT index only supports string type", + } + ); + + let options = parser + .parse_options(Keyword::WITH) + .context(error::SyntaxSnafu)? + .into_iter() + .map(parse_option_string) + .collect::>>()?; + + for key in options.keys() { + ensure!( + validate_column_fulltext_option(key), + InvalidColumnOptionSnafu { + name: column_name.to_string(), + msg: format!("invalid FULLTEXT option: {key}"), + } + ); + } + + column_extensions.fulltext_options = Some(options.into()); + Ok(true) + } else { + Ok(false) + } + } + fn parse_optional_table_constraint(&mut self) -> Result> { let name = if self.parser.parse_keyword(Keyword::CONSTRAINT) { let raw_name = self.parse_identifier().context(SyntaxSnafu)?; @@ -742,7 +814,7 @@ impl<'a> ParserContext<'a> { } } -fn validate_time_index(columns: &[ColumnDef], constraints: &[TableConstraint]) -> Result<()> { +fn validate_time_index(columns: &[Column], constraints: &[TableConstraint]) -> Result<()> { let time_index_constraints: Vec<_> = constraints .iter() .filter_map(|c| { @@ -786,7 +858,7 @@ fn validate_time_index(columns: &[ColumnDef], constraints: &[TableConstraint]) - let time_index_column_ident = &time_index_constraints[0][0]; let time_index_column = columns .iter() - .find(|c| c.name.value == *time_index_column_ident.value) + .find(|c| c.name().value == *time_index_column_ident.value) .with_context(|| InvalidTimeIndexSnafu { msg: format!( "time index column {} not found in columns", @@ -794,11 +866,11 @@ fn validate_time_index(columns: &[ColumnDef], constraints: &[TableConstraint]) - ), })?; - let time_index_data_type = get_real_timestamp_type(&time_index_column.data_type); + let time_index_data_type = get_unalias_type(time_index_column.data_type()); ensure!( matches!(time_index_data_type, DataType::Timestamp(_, _)), InvalidColumnOptionSnafu { - name: time_index_column.name.to_string(), + name: time_index_column.name().to_string(), msg: "time index column data type should be timestamp", } ); @@ -806,7 +878,7 @@ fn validate_time_index(columns: &[ColumnDef], constraints: &[TableConstraint]) - Ok(()) } -fn get_real_timestamp_type(data_type: &DataType) -> DataType { +fn get_unalias_type(data_type: &DataType) -> DataType { match data_type { DataType::Custom(name, tokens) if name.0.len() == 1 && tokens.is_empty() => { if let Some(real_type) = get_data_type_by_alias_name(name.0[0].value.as_str()) { @@ -819,7 +891,7 @@ fn get_real_timestamp_type(data_type: &DataType) -> DataType { } } -fn validate_partitions(columns: &[ColumnDef], partitions: &Partitions) -> Result<()> { +fn validate_partitions(columns: &[Column], partitions: &Partitions) -> Result<()> { let partition_columns = ensure_partition_columns_defined(columns, partitions)?; ensure_exprs_are_binary(&partitions.exprs, &partition_columns)?; @@ -828,7 +900,7 @@ fn validate_partitions(columns: &[ColumnDef], partitions: &Partitions) -> Result } /// Ensure all exprs are binary expr and all the columns are defined in the column list. -fn ensure_exprs_are_binary(exprs: &[Expr], columns: &[&ColumnDef]) -> Result<()> { +fn ensure_exprs_are_binary(exprs: &[Expr], columns: &[&Column]) -> Result<()> { for expr in exprs { // The first level must be binary expr if let Expr::BinaryOp { left, op: _, right } = expr { @@ -847,7 +919,7 @@ fn ensure_exprs_are_binary(exprs: &[Expr], columns: &[&ColumnDef]) -> Result<()> /// Check if the expr is a binary expr, an ident or a literal value. /// If is ident, then check it is in the column list. /// This recursive function is intended to be used by [ensure_exprs_are_binary]. -fn ensure_one_expr(expr: &Expr, columns: &[&ColumnDef]) -> Result<()> { +fn ensure_one_expr(expr: &Expr, columns: &[&Column]) -> Result<()> { match expr { Expr::BinaryOp { left, op: _, right } => { ensure_one_expr(left, columns)?; @@ -857,7 +929,7 @@ fn ensure_one_expr(expr: &Expr, columns: &[&ColumnDef]) -> Result<()> { Expr::Identifier(ident) => { let column_name = &ident.value; ensure!( - columns.iter().any(|c| &c.name.value == column_name), + columns.iter().any(|c| &c.name().value == column_name), error::InvalidSqlSnafu { msg: format!( "Column {:?} in rule expr is not referenced in PARTITION ON!", @@ -877,9 +949,9 @@ fn ensure_one_expr(expr: &Expr, columns: &[&ColumnDef]) -> Result<()> { /// Ensure that all columns used in "PARTITION ON COLUMNS" are defined in create table. fn ensure_partition_columns_defined<'a>( - columns: &'a [ColumnDef], + columns: &'a [Column], partitions: &'a Partitions, -) -> Result> { +) -> Result> { partitions .column_list .iter() @@ -888,12 +960,12 @@ fn ensure_partition_columns_defined<'a>( // a linear search to find the target every time is fine. columns .iter() - .find(|c| &c.name == x) + .find(|c| c.name() == x) .context(error::InvalidSqlSnafu { msg: format!("Partition column {:?} not defined!", x.value), }) }) - .collect::>>() + .collect::>>() } #[cfg(test)] @@ -1036,10 +1108,10 @@ mod tests { assert_eq!(c.options, options.into()); let columns = &c.columns; - assert_column_def(&columns[0], "host", "STRING"); - assert_column_def(&columns[1], "ts", "TIMESTAMP"); - assert_column_def(&columns[2], "cpu", "FLOAT"); - assert_column_def(&columns[3], "memory", "FLOAT64"); + assert_column_def(&columns[0].column_def, "host", "STRING"); + assert_column_def(&columns[1].column_def, "ts", "TIMESTAMP"); + assert_column_def(&columns[2].column_def, "cpu", "FLOAT"); + assert_column_def(&columns[3].column_def, "memory", "FLOAT64"); let constraints = &c.constraints; assert!(matches!(&constraints[0], TableConstraint::Unique { @@ -1450,8 +1522,8 @@ ENGINE=mito"; assert_eq!(result.len(), 1); if let Statement::CreateTable(c) = &result[0] { let ts = c.columns[2].clone(); - assert_eq!(ts.name.to_string(), "ts"); - assert_eq!(ts.options[0].option, NotNull); + assert_eq!(ts.name().to_string(), "ts"); + assert_eq!(ts.options()[0].option, NotNull); } else { panic!("should be create table statement"); } @@ -1561,9 +1633,9 @@ ENGINE=mito"; _ => panic!("should be time index constraint"), } let ts = c.columns[2].clone(); - assert_eq!(ts.name.to_string(), "ts"); - assert!(matches!(ts.options[0].option, ColumnOption::Default(..))); - assert_eq!(ts.options[1].option, NotNull); + assert_eq!(ts.name().to_string(), "ts"); + assert!(matches!(ts.options()[0].option, ColumnOption::Default(..))); + assert_eq!(ts.options()[1].option, NotNull); } else { unreachable!("should be create table statement"); } @@ -1657,10 +1729,10 @@ ENGINE=mito"; assert_eq!("mito", c.engine); assert_eq!(4, c.columns.len()); let columns = &c.columns; - assert_column_def(&columns[0], "host", "STRING"); - assert_column_def(&columns[1], "ts", "TIMESTAMP"); - assert_column_def(&columns[2], "cpu", "FLOAT"); - assert_column_def(&columns[3], "memory", "FLOAT64"); + assert_column_def(&columns[0].column_def, "host", "STRING"); + assert_column_def(&columns[1].column_def, "ts", "TIMESTAMP"); + assert_column_def(&columns[2].column_def, "cpu", "FLOAT"); + assert_column_def(&columns[3].column_def, "memory", "FLOAT64"); let constraints = &c.constraints; assert!(matches!(&constraints[0], TableConstraint::Unique { @@ -1804,4 +1876,133 @@ non TIMESTAMP(6) TIME INDEX, assert!(result.is_err()); assert_matches!(result, Err(crate::error::Error::Syntax { .. })); } + + #[test] + fn test_parse_create_table_fulltext_options() { + let sql1 = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg TEXT FULLTEXT, +)"; + let result1 = ParserContext::create_with_dialect( + sql1, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap(); + + if let Statement::CreateTable(c) = &result1[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + assert!(col.extensions.fulltext_options.as_ref().unwrap().is_empty()); + } + }); + } else { + panic!("should be create_table statement"); + } + + let sql2 = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg STRING FULLTEXT WITH (analyzer='English', case_sensitive='false') +)"; + let result2 = ParserContext::create_with_dialect( + sql2, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap(); + + if let Statement::CreateTable(c) = &result2[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg" { + let options = col.extensions.fulltext_options.as_ref().unwrap(); + assert_eq!(options.len(), 2); + assert_eq!(options.get("analyzer").unwrap(), "English"); + assert_eq!(options.get("case_sensitive").unwrap(), "false"); + } + }); + } else { + panic!("should be create_table statement"); + } + + let sql3 = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg1 TINYTEXT FULLTEXT WITH (analyzer='English', case_sensitive='false'), + msg2 CHAR(20) FULLTEXT WITH (analyzer='Chinese', case_sensitive='true') +)"; + let result3 = ParserContext::create_with_dialect( + sql3, + &GreptimeDbDialect {}, + ParseOptions::default(), + ) + .unwrap(); + + if let Statement::CreateTable(c) = &result3[0] { + c.columns.iter().for_each(|col| { + if col.name().value == "msg1" { + let options = col.extensions.fulltext_options.as_ref().unwrap(); + assert_eq!(options.len(), 2); + assert_eq!(options.get("analyzer").unwrap(), "English"); + assert_eq!(options.get("case_sensitive").unwrap(), "false"); + } else if col.name().value == "msg2" { + let options = col.extensions.fulltext_options.as_ref().unwrap(); + assert_eq!(options.len(), 2); + assert_eq!(options.get("analyzer").unwrap(), "Chinese"); + assert_eq!(options.get("case_sensitive").unwrap(), "true"); + } + }); + } else { + panic!("should be create_table statement"); + } + } + + #[test] + fn test_parse_create_table_fulltext_options_invalid_type() { + let sql = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg INT FULLTEXT, +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("FULLTEXT index only supports string type")); + } + + #[test] + fn test_parse_create_table_fulltext_options_duplicate() { + let sql = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg STRING FULLTEXT WITH (analyzer='English', analyzer='Chinese') FULLTEXT WITH (case_sensitive='false') +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("duplicated FULLTEXT option")); + } + + #[test] + fn test_parse_create_table_fulltext_options_invalid_option() { + let sql = r" +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg STRING FULLTEXT WITH (analyzer='English', invalid_option='Chinese') +)"; + let result = + ParserContext::create_with_dialect(sql, &GreptimeDbDialect {}, ParseOptions::default()); + assert!(result.is_err()); + assert!(result + .unwrap_err() + .to_string() + .contains("invalid FULLTEXT option")); + } } diff --git a/src/sql/src/statements.rs b/src/sql/src/statements.rs index 4d27453cefd1..ccf93861d809 100644 --- a/src/sql/src/statements.rs +++ b/src/sql/src/statements.rs @@ -43,10 +43,8 @@ use datatypes::schema::constraint::{CURRENT_TIMESTAMP, CURRENT_TIMESTAMP_FN}; use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, COMMENT_KEY}; use datatypes::types::{cast, TimestampType}; use datatypes::value::{OrderedF32, OrderedF64, Value}; -pub use option_map::OptionMap; use snafu::{ensure, OptionExt, ResultExt}; use sqlparser::ast::{ExactNumberInfo, UnaryOperator}; -pub use transform::{get_data_type_by_alias_name, transform_statements}; use crate::ast::{ ColumnDef, ColumnOption, ColumnOptionDef, DataType as SqlDataType, Expr, TimezoneInfo, @@ -55,8 +53,12 @@ use crate::ast::{ use crate::error::{ self, ColumnTypeMismatchSnafu, ConvertSqlValueSnafu, ConvertToGrpcDataTypeSnafu, ConvertValueSnafu, InvalidCastSnafu, InvalidSqlValueSnafu, ParseSqlValueSnafu, Result, - SerializeColumnDefaultConstraintSnafu, TimestampOverflowSnafu, UnsupportedDefaultValueSnafu, + SerializeColumnDefaultConstraintSnafu, SetFulltextOptionSnafu, TimestampOverflowSnafu, + UnsupportedDefaultValueSnafu, }; +use crate::statements::create::Column; +pub use crate::statements::option_map::OptionMap; +pub use crate::statements::transform::{get_data_type_by_alias_name, transform_statements}; fn parse_string_to_value( column_name: &str, @@ -353,31 +355,31 @@ pub fn has_primary_key_option(column_def: &ColumnDef) -> bool { // TODO(yingwen): Make column nullable by default, and checks invalid case like // a column is not nullable but has a default value null. -/// Create a `ColumnSchema` from `ColumnDef`. -pub fn column_def_to_schema( - column_def: &ColumnDef, +/// Create a `ColumnSchema` from `Column`. +pub fn column_to_schema( + column: &Column, is_time_index: bool, timezone: Option<&Timezone>, ) -> Result { - let is_nullable = column_def - .options + let is_nullable = column + .options() .iter() .all(|o| !matches!(o.option, ColumnOption::NotNull)) && !is_time_index; - let name = column_def.name.value.clone(); - let data_type = sql_data_type_to_concrete_data_type(&column_def.data_type)?; + let name = column.name().value.clone(); + let data_type = sql_data_type_to_concrete_data_type(column.data_type())?; let default_constraint = - parse_column_default_constraint(&name, &data_type, &column_def.options, timezone)?; + parse_column_default_constraint(&name, &data_type, column.options(), timezone)?; let mut column_schema = ColumnSchema::new(name, data_type, is_nullable) .with_time_index(is_time_index) .with_default_constraint(default_constraint) .context(error::InvalidDefaultSnafu { - column: &column_def.name.value, + column: &column.name().value, })?; - if let Some(ColumnOption::Comment(c)) = column_def.options.iter().find_map(|o| { + if let Some(ColumnOption::Comment(c)) = column.options().iter().find_map(|o| { if matches!(o.option, ColumnOption::Comment(_)) { Some(&o.option) } else { @@ -389,6 +391,12 @@ pub fn column_def_to_schema( .insert(COMMENT_KEY.to_string(), c.to_string()); } + if let Some(options) = column.extensions.build_fulltext_options()? { + column_schema = column_schema + .with_fulltext_options(options) + .context(SetFulltextOptionSnafu)?; + } + Ok(column_schema) } @@ -439,6 +447,7 @@ pub fn sql_column_def_to_grpc_column_def( semantic_type: semantic_type as _, comment: String::new(), datatype_extension: datatype_ext, + options: None, }) } @@ -555,16 +564,20 @@ pub fn sql_location_to_grpc_add_column_location( #[cfg(test)] mod tests { use std::assert_matches::assert_matches; + use std::collections::HashMap; use api::v1::ColumnDataType; use common_time::timestamp::TimeUnit; use common_time::timezone::set_default_timezone; + use datatypes::schema::FulltextAnalyzer; use datatypes::types::BooleanType; use datatypes::value::OrderedFloat; use super::*; use crate::ast::TimezoneInfo; + use crate::statements::create::ColumnExtensions; use crate::statements::ColumnOption; + use crate::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; fn check_type(sql_type: SqlDataType, data_type: ConcreteDataType) { assert_eq!( @@ -1074,15 +1087,18 @@ mod tests { } #[test] - pub fn test_column_def_to_schema() { - let column_def = ColumnDef { - name: "col".into(), - data_type: SqlDataType::Double, - collation: None, - options: vec![], + pub fn test_column_to_schema() { + let column_def = Column { + column_def: ColumnDef { + name: "col".into(), + data_type: SqlDataType::Double, + collation: None, + options: vec![], + }, + extensions: ColumnExtensions::default(), }; - let column_schema = column_def_to_schema(&column_def, false, None).unwrap(); + let column_schema = column_to_schema(&column_def, false, None).unwrap(); assert_eq!("col", column_schema.name); assert_eq!( @@ -1092,7 +1108,7 @@ mod tests { assert!(column_schema.is_nullable()); assert!(!column_schema.is_time_index()); - let column_schema = column_def_to_schema(&column_def, true, None).unwrap(); + let column_schema = column_to_schema(&column_def, true, None).unwrap(); assert_eq!("col", column_schema.name); assert_eq!( @@ -1102,23 +1118,26 @@ mod tests { assert!(!column_schema.is_nullable()); assert!(column_schema.is_time_index()); - let column_def = ColumnDef { - name: "col2".into(), - data_type: SqlDataType::String(None), - collation: None, - options: vec![ - ColumnOptionDef { - name: None, - option: ColumnOption::NotNull, - }, - ColumnOptionDef { - name: None, - option: ColumnOption::Comment("test comment".to_string()), - }, - ], + let column_def = Column { + column_def: ColumnDef { + name: "col2".into(), + data_type: SqlDataType::String(None), + collation: None, + options: vec![ + ColumnOptionDef { + name: None, + option: ColumnOption::NotNull, + }, + ColumnOptionDef { + name: None, + option: ColumnOption::Comment("test comment".to_string()), + }, + ], + }, + extensions: ColumnExtensions::default(), }; - let column_schema = column_def_to_schema(&column_def, false, None).unwrap(); + let column_schema = column_to_schema(&column_def, false, None).unwrap(); assert_eq!("col2", column_schema.name); assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type); @@ -1131,24 +1150,27 @@ mod tests { } #[test] - pub fn test_column_def_to_schema_timestamp_with_timezone() { - let column_def = ColumnDef { - name: "col".into(), - // MILLISECOND - data_type: SqlDataType::Timestamp(Some(3), TimezoneInfo::None), - collation: None, - options: vec![ColumnOptionDef { - name: None, - option: ColumnOption::Default(Expr::Value(SqlValue::SingleQuotedString( - "2024-01-30T00:01:01".to_string(), - ))), - }], + pub fn test_column_to_schema_timestamp_with_timezone() { + let column = Column { + column_def: ColumnDef { + name: "col".into(), + // MILLISECOND + data_type: SqlDataType::Timestamp(Some(3), TimezoneInfo::None), + collation: None, + options: vec![ColumnOptionDef { + name: None, + option: ColumnOption::Default(Expr::Value(SqlValue::SingleQuotedString( + "2024-01-30T00:01:01".to_string(), + ))), + }], + }, + extensions: ColumnExtensions::default(), }; // with timezone "Asia/Shanghai" - let column_schema = column_def_to_schema( - &column_def, + let column_schema = column_to_schema( + &column, false, Some(&Timezone::from_tz_string("Asia/Shanghai").unwrap()), ) @@ -1168,7 +1190,7 @@ mod tests { ); // without timezone - let column_schema = column_def_to_schema(&column_def, false, None).unwrap(); + let column_schema = column_to_schema(&column, false, None).unwrap(); assert_eq!("col", column_schema.name); assert_eq!( @@ -1184,6 +1206,40 @@ mod tests { ); } + #[test] + fn test_column_to_schema_with_fulltext() { + let column = Column { + column_def: ColumnDef { + name: "col".into(), + data_type: SqlDataType::Text, + collation: None, + options: vec![], + }, + extensions: ColumnExtensions { + fulltext_options: Some( + HashMap::from_iter([ + ( + COLUMN_FULLTEXT_OPT_KEY_ANALYZER.to_string(), + "English".to_string(), + ), + ( + COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE.to_string(), + "true".to_string(), + ), + ]) + .into(), + ), + }, + }; + + let column_schema = column_to_schema(&column, false, None).unwrap(); + assert_eq!("col", column_schema.name); + assert_eq!(ConcreteDataType::string_datatype(), column_schema.data_type); + let fulltext_options = column_schema.fulltext_options().unwrap().unwrap(); + assert_eq!(fulltext_options.analyzer, FulltextAnalyzer::English); + assert!(fulltext_options.case_sensitive); + } + #[test] pub fn test_parse_placeholder_value() { assert!(sql_value_to_value( diff --git a/src/sql/src/statements/create.rs b/src/sql/src/statements/create.rs index 6d43aebca713..cafe70ec2fef 100644 --- a/src/sql/src/statements/create.rs +++ b/src/sql/src/statements/create.rs @@ -15,13 +15,16 @@ use std::fmt::{Display, Formatter}; use common_catalog::consts::FILE_ENGINE; +use datatypes::schema::{FulltextAnalyzer, FulltextOptions}; use itertools::Itertools; -use sqlparser::ast::{Expr, Query}; +use sqlparser::ast::{ColumnOptionDef, DataType, Expr, Query}; use sqlparser_derive::{Visit, VisitMut}; use crate::ast::{ColumnDef, Ident, ObjectName, TableConstraint, Value as SqlValue}; +use crate::error::{FulltextInvalidOptionSnafu, Result}; use crate::statements::statement::Statement; use crate::statements::OptionMap; +use crate::{COLUMN_FULLTEXT_OPT_KEY_ANALYZER, COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE}; const LINE_SEP: &str = ",\n"; const COMMA_SEP: &str = ", "; @@ -83,7 +86,7 @@ pub struct CreateTable { pub table_id: u32, /// Table name pub name: ObjectName, - pub columns: Vec, + pub columns: Vec, pub engine: String, pub constraints: Vec, /// Table options in `WITH`. All keys are lowercase. @@ -91,6 +94,98 @@ pub struct CreateTable { pub partitions: Option, } +/// Column definition in `CREATE TABLE` statement. +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] +pub struct Column { + /// `ColumnDef` from `sqlparser::ast` + pub column_def: ColumnDef, + /// Column extensions for greptimedb dialect. + pub extensions: ColumnExtensions, +} + +/// Column extensions for greptimedb dialect. +#[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut, Default)] +pub struct ColumnExtensions { + /// Fulltext options. + pub fulltext_options: Option, +} + +impl Column { + pub fn name(&self) -> &Ident { + &self.column_def.name + } + + pub fn data_type(&self) -> &DataType { + &self.column_def.data_type + } + + pub fn mut_data_type(&mut self) -> &mut DataType { + &mut self.column_def.data_type + } + + pub fn options(&self) -> &[ColumnOptionDef] { + &self.column_def.options + } + + pub fn mut_options(&mut self) -> &mut Vec { + &mut self.column_def.options + } +} + +impl Display for Column { + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.column_def)?; + if let Some(fulltext_options) = &self.extensions.fulltext_options { + if !fulltext_options.is_empty() { + let options = fulltext_options.kv_pairs(); + write!(f, " FULLTEXT WITH({})", format_list_comma!(options))?; + } else { + write!(f, " FULLTEXT")?; + } + } + Ok(()) + } +} + +impl ColumnExtensions { + pub fn build_fulltext_options(&self) -> Result> { + let Some(options) = self.fulltext_options.as_ref() else { + return Ok(None); + }; + + let mut fulltext = FulltextOptions { + enable: true, + ..Default::default() + }; + if let Some(analyzer) = options.get(COLUMN_FULLTEXT_OPT_KEY_ANALYZER) { + match analyzer.to_ascii_lowercase().as_str() { + "english" => fulltext.analyzer = FulltextAnalyzer::English, + "chinese" => fulltext.analyzer = FulltextAnalyzer::Chinese, + _ => { + return FulltextInvalidOptionSnafu { + msg: format!("{analyzer}, expected: 'English' | 'Chinese'"), + } + .fail(); + } + } + } + if let Some(case_sensitive) = options.get(COLUMN_FULLTEXT_OPT_KEY_CASE_SENSITIVE) { + match case_sensitive.to_ascii_lowercase().as_str() { + "true" => fulltext.case_sensitive = true, + "false" => fulltext.case_sensitive = false, + _ => { + return FulltextInvalidOptionSnafu { + msg: format!("{case_sensitive}, expected: 'true' | 'false'"), + } + .fail(); + } + } + } + + Ok(Some(fulltext)) + } +} + #[derive(Debug, PartialEq, Eq, Clone, Visit, VisitMut)] pub struct Partitions { pub column_list: Vec, @@ -201,7 +296,7 @@ impl Display for CreateDatabase { pub struct CreateExternalTable { /// Table name pub name: ObjectName, - pub columns: Vec, + pub columns: Vec, pub constraints: Vec, /// Table options in `WITH`. All keys are lowercase. pub options: OptionMap, diff --git a/src/sql/src/statements/transform/type_alias.rs b/src/sql/src/statements/transform/type_alias.rs index 7209656d942f..dac5b627645a 100644 --- a/src/sql/src/statements/transform/type_alias.rs +++ b/src/sql/src/statements/transform/type_alias.rs @@ -16,7 +16,7 @@ use std::ops::ControlFlow; use datatypes::data_type::DataType as GreptimeDataType; use sqlparser::ast::{ - ColumnDef, DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, Value, + DataType, Expr, Function, FunctionArg, FunctionArgExpr, Ident, ObjectName, Value, }; use crate::error::Result; @@ -45,12 +45,12 @@ impl TransformRule for TypeAliasTransformRule { Statement::CreateTable(CreateTable { columns, .. }) => { columns .iter_mut() - .for_each(|ColumnDef { data_type, .. }| replace_type_alias(data_type)); + .for_each(|column| replace_type_alias(column.mut_data_type())); } Statement::CreateExternalTable(CreateExternalTable { columns, .. }) => { columns .iter_mut() - .for_each(|ColumnDef { data_type, .. }| replace_type_alias(data_type)); + .for_each(|column| replace_type_alias(column.mut_data_type())); } Statement::Alter(alter_table) => { if let AlterTableOperation::ChangeColumnType { target_type, .. } = diff --git a/src/store-api/src/metadata.rs b/src/store-api/src/metadata.rs index a41565b2f1ff..a94879675e5b 100644 --- a/src/store-api/src/metadata.rs +++ b/src/store-api/src/metadata.rs @@ -21,14 +21,14 @@ use std::collections::{HashMap, HashSet}; use std::fmt; use std::sync::Arc; -use api::helper::ColumnDataTypeWrapper; +use api::v1::column_def::try_as_column_schema; use api::v1::region::RegionColumnDef; -use api::v1::{ColumnDef, SemanticType}; +use api::v1::SemanticType; use common_error::ext::ErrorExt; use common_error::status_code::StatusCode; use common_macro::stack_trace_debug; use datatypes::arrow::datatypes::FieldRef; -use datatypes::schema::{ColumnDefaultConstraint, ColumnSchema, Schema, SchemaRef}; +use datatypes::schema::{ColumnSchema, Schema, SchemaRef}; use serde::de::Error; use serde::{Deserialize, Deserializer, Serialize}; use snafu::{ensure, Location, OptionExt, ResultExt, Snafu}; @@ -61,26 +61,6 @@ impl fmt::Debug for ColumnMetadata { } impl ColumnMetadata { - fn inner_try_from_column_def(column_def: ColumnDef) -> Result { - let default_constrain = if column_def.default_constraint.is_empty() { - None - } else { - Some( - ColumnDefaultConstraint::try_from(column_def.default_constraint.as_slice()) - .context(ConvertDatatypesSnafu)?, - ) - }; - let data_type = ColumnDataTypeWrapper::new( - column_def.data_type(), - column_def.datatype_extension.clone(), - ) - .into(); - ColumnSchema::new(&column_def.name, data_type, column_def.is_nullable) - .with_time_index(column_def.semantic_type() == SemanticType::Timestamp) - .with_default_constraint(default_constrain) - .context(ConvertDatatypesSnafu) - } - /// Construct `Self` from protobuf struct [RegionColumnDef] pub fn try_from_column_def(column_def: RegionColumnDef) -> Result { let column_id = column_def.column_id; @@ -90,7 +70,7 @@ impl ColumnMetadata { err: "column_def is absent", })?; let semantic_type = column_def.semantic_type(); - let column_schema = Self::inner_try_from_column_def(column_def)?; + let column_schema = try_as_column_schema(&column_def).context(ConvertColumnSchemaSnafu)?; Ok(Self { column_schema, @@ -714,13 +694,6 @@ pub enum MetadataError { error: serde_json::Error, }, - #[snafu(display("Failed to convert struct from datatypes"))] - ConvertDatatypes { - #[snafu(implicit)] - location: Location, - source: datatypes::error::Error, - }, - #[snafu(display("Invalid raw region request, err: {}", err))] InvalidRawRegionRequest { err: String, @@ -758,6 +731,13 @@ pub enum MetadataError { #[snafu(implicit)] location: Location, }, + + #[snafu(display("Failed to convert column schema"))] + ConvertColumnSchema { + source: api::error::Error, + #[snafu(implicit)] + location: Location, + }, } impl ErrorExt for MetadataError { diff --git a/tests/cases/standalone/common/create/create_with_fulltext.result b/tests/cases/standalone/common/create/create_with_fulltext.result new file mode 100644 index 000000000000..fd1ae5777ad9 --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_fulltext.result @@ -0,0 +1,101 @@ +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg STRING FULLTEXT, +); + +Affected Rows: 0 + +SHOW CREATE TABLE log; + ++-------+------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------+------------------------------------------------------------------------------------+ +| log | CREATE TABLE IF NOT EXISTS "log" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------+------------------------------------------------------------------------------------+ + +DROP TABLE log; + +Affected Rows: 0 + +CREATE TABLE log_with_opts ( + ts TIMESTAMP TIME INDEX, + msg TEXT FULLTEXT WITH (analyzer='English', case_sensitive='true'), +); + +Affected Rows: 0 + +SHOW CREATE TABLE log_with_opts; + ++---------------+-----------------------------------------------------------------------------------+ +| Table | Create Table | ++---------------+-----------------------------------------------------------------------------------+ +| log_with_opts | CREATE TABLE IF NOT EXISTS "log_with_opts" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'true'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++---------------+-----------------------------------------------------------------------------------+ + +DROP TABLE log_with_opts; + +Affected Rows: 0 + +CREATE TABLE log_multi_fulltext_cols ( + ts TIMESTAMP TIME INDEX, + msg TINYTEXT FULLTEXT, + msg2 VARCHAR FULLTEXT, +); + +Affected Rows: 0 + +SHOW CREATE TABLE log_multi_fulltext_cols; + ++-------------------------+-------------------------------------------------------------------------------------+ +| Table | Create Table | ++-------------------------+-------------------------------------------------------------------------------------+ +| log_multi_fulltext_cols | CREATE TABLE IF NOT EXISTS "log_multi_fulltext_cols" ( | +| | "ts" TIMESTAMP(3) NOT NULL, | +| | "msg" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'), | +| | "msg2" STRING NULL FULLTEXT WITH(analyzer = 'English', case_sensitive = 'false'), | +| | TIME INDEX ("ts") | +| | ) | +| | | +| | ENGINE=mito | +| | | ++-------------------------+-------------------------------------------------------------------------------------+ + +DROP TABLE log_multi_fulltext_cols; + +Affected Rows: 0 + +CREATE TABLE log_dup_fulltext_opts ( + ts TIMESTAMP TIME INDEX, + msg TEXT FULLTEXT FULLTEXT, +); + +Error: 1004(InvalidArguments), Invalid column option, column name: msg, error: duplicated FULLTEXT option + +CREATE TABLE log_with_invalid_type ( + ts TIMESTAMP TIME INDEX, + msg INT FULLTEXT, +); + +Error: 1004(InvalidArguments), Invalid column option, column name: msg, error: FULLTEXT index only supports string type + +CREATE TABLE log_with_invalid_option ( + ts TIMESTAMP TIME INDEX, + msg TEXT FULLTEXT WITH (analyzer='English', invalid_option='true'), +); + +Error: 1004(InvalidArguments), Invalid column option, column name: msg, error: invalid FULLTEXT option: invalid_option + diff --git a/tests/cases/standalone/common/create/create_with_fulltext.sql b/tests/cases/standalone/common/create/create_with_fulltext.sql new file mode 100644 index 000000000000..27e2328e8d8c --- /dev/null +++ b/tests/cases/standalone/common/create/create_with_fulltext.sql @@ -0,0 +1,45 @@ +CREATE TABLE log ( + ts TIMESTAMP TIME INDEX, + msg STRING FULLTEXT, +); + +SHOW CREATE TABLE log; + +DROP TABLE log; + + +CREATE TABLE log_with_opts ( + ts TIMESTAMP TIME INDEX, + msg TEXT FULLTEXT WITH (analyzer='English', case_sensitive='true'), +); + +SHOW CREATE TABLE log_with_opts; + +DROP TABLE log_with_opts; + + +CREATE TABLE log_multi_fulltext_cols ( + ts TIMESTAMP TIME INDEX, + msg TINYTEXT FULLTEXT, + msg2 VARCHAR FULLTEXT, +); + +SHOW CREATE TABLE log_multi_fulltext_cols; + +DROP TABLE log_multi_fulltext_cols; + + +CREATE TABLE log_dup_fulltext_opts ( + ts TIMESTAMP TIME INDEX, + msg TEXT FULLTEXT FULLTEXT, +); + +CREATE TABLE log_with_invalid_type ( + ts TIMESTAMP TIME INDEX, + msg INT FULLTEXT, +); + +CREATE TABLE log_with_invalid_option ( + ts TIMESTAMP TIME INDEX, + msg TEXT FULLTEXT WITH (analyzer='English', invalid_option='true'), +);