From c5e7355fdc1e76adc1d8d00b2174bd95b58d8061 Mon Sep 17 00:00:00 2001 From: baishen Date: Mon, 27 Jan 2025 22:29:25 +0800 Subject: [PATCH] feat(query): Virtual column support alias name (#17365) * feat(query): Virtual column support alias name * fix * fix machete * add tests * fix tests * fix comments * fix --- Cargo.lock | 1 - src/meta/api/src/schema_api_impl.rs | 2 + src/meta/api/src/schema_api_test_suite.rs | 331 +++++++------ src/meta/app/src/schema/mod.rs | 1 + src/meta/app/src/schema/virtual_column.rs | 51 +- src/meta/proto-conv/src/util.rs | 3 +- .../virtual_column_from_to_protobuf_impl.rs | 65 +-- src/meta/proto-conv/tests/it/main.rs | 1 + .../tests/it/v041_virtual_column.rs | 21 +- .../tests/it/v112_virtual_column.rs | 30 +- .../tests/it/v119_virtual_column.rs | 83 ++++ src/meta/protos/proto/virtual_column.proto | 96 ++-- .../ast/src/ast/statements/virtual_column.rs | 21 +- src/query/ast/src/parser/statement.rs | 16 +- src/query/ast/tests/it/parser.rs | 4 +- src/query/ast/tests/it/testdata/stmt.txt | 452 ++++++++++-------- .../fuse/operations/virtual_columns.rs | 22 +- .../virtual_column/virtual_column_handler.rs | 4 +- .../fuse/operations/virtual_columns.rs | 30 +- .../ee_features/virtual_column/Cargo.toml | 1 - .../virtual_column/src/virtual_column.rs | 6 +- .../interpreter_table_modify_column.rs | 2 + .../interpreter_virtual_column_alter.rs | 1 + .../interpreter_virtual_column_create.rs | 1 + .../it/pipelines/filter/random_filter_expr.rs | 2 +- .../sql/src/planner/binder/bind_context.rs | 30 +- .../planner/binder/bind_query/bind_select.rs | 12 +- .../binder/bind_table_reference/bind_table.rs | 117 ++++- .../sql/src/planner/binder/column_binding.rs | 17 +- .../src/planner/binder/ddl/virtual_column.rs | 63 ++- src/query/sql/src/planner/binder/project.rs | 6 +- src/query/sql/src/planner/binder/table.rs | 4 +- .../sql/src/planner/expression_parser.rs | 4 +- src/query/sql/src/planner/metadata.rs | 6 +- .../aggregate/normalize_aggregate.rs | 2 +- .../rewrite/rule_push_down_filter_scan.rs | 2 +- .../rewrite/rule_push_down_filter_union.rs | 2 +- .../statistics/collect_statistics.rs | 4 +- .../src/planner/plans/ddl/virtual_column.rs | 10 +- .../sql/src/planner/semantic/type_check.rs | 95 +--- .../system/src/virtual_columns_table.rs | 19 +- ..._0002_ddl_create_drop_virtual_columns.test | 17 +- .../standalone/ee/explain_virtual_column.test | 35 +- 43 files changed, 1082 insertions(+), 610 deletions(-) create mode 100644 src/meta/proto-conv/tests/it/v119_virtual_column.rs diff --git a/Cargo.lock b/Cargo.lock index 7c76d54d7e2b7..e1207714a3ad3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4804,7 +4804,6 @@ dependencies = [ "databend-common-base", "databend-common-catalog", "databend-common-exception", - "databend-common-expression", "databend-common-meta-app", "databend-common-pipeline-core", "databend-common-storages-fuse", diff --git a/src/meta/api/src/schema_api_impl.rs b/src/meta/api/src/schema_api_impl.rs index b2924ec21bf56..dab3ab9ebf9b5 100644 --- a/src/meta/api/src/schema_api_impl.rs +++ b/src/meta/api/src/schema_api_impl.rs @@ -932,6 +932,7 @@ impl + ?Sized> SchemaApi for KV { virtual_columns: req.virtual_columns.clone(), created_on: Utc::now(), updated_on: None, + auto_generated: req.auto_generated, }; self.insert_name_value_with_create_option( @@ -963,6 +964,7 @@ impl + ?Sized> SchemaApi for KV { |mut meta| { meta.virtual_columns = req.virtual_columns.clone(); meta.updated_on = Some(Utc::now()); + meta.auto_generated = req.auto_generated; Some((meta, None)) }, not_found, diff --git a/src/meta/api/src/schema_api_test_suite.rs b/src/meta/api/src/schema_api_test_suite.rs index e9e49ad72c67f..131fe8e5b1da0 100644 --- a/src/meta/api/src/schema_api_test_suite.rs +++ b/src/meta/api/src/schema_api_test_suite.rs @@ -126,6 +126,7 @@ use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::UpsertTableCopiedFileReq; use databend_common_meta_app::schema::UpsertTableOptionReq; use databend_common_meta_app::schema::VirtualColumnIdent; +use databend_common_meta_app::schema::VirtualField; use databend_common_meta_app::tenant::Tenant; use databend_common_meta_app::tenant::ToTenant; use databend_common_meta_app::KeyWithTenant; @@ -6631,25 +6632,30 @@ impl SchemaApiTestSuite { create_option: CreateOption::Create, name_ident: name_ident.clone(), virtual_columns: vec![ - ( - "variant:k1".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant[1]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant:k1:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k1:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k1".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant[1]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant:k1:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None, + }, + VirtualField { + expr: "variant:k1:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64, ))), - ), + alias_name: None, + }, ], + auto_generated: false, }; mt.create_virtual_column(req.clone()).await?; @@ -6659,25 +6665,30 @@ impl SchemaApiTestSuite { create_option: CreateOption::Create, name_ident: name_ident.clone(), virtual_columns: vec![ - ( - "variant:k1".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant[1]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant:k1:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k1:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k1".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant[1]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant:k1:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None, + }, + VirtualField { + expr: "variant:k1:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64, ))), - ), + alias_name: None, + }, ], + auto_generated: false, }; let res = mt.create_virtual_column(req).await; @@ -6691,24 +6702,28 @@ impl SchemaApiTestSuite { let res = mt.list_virtual_columns(req).await?; assert_eq!(1, res.len()); assert_eq!(res[0].virtual_columns, vec![ - ( - "variant:k1".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)) - ), - ( - "variant[1]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)) - ), - ( - "variant:k1:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k1:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k1".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None + }, + VirtualField { + expr: "variant[1]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None + }, + VirtualField { + expr: "variant:k1:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None + }, + VirtualField { + expr: "variant:k1:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64 ))), - ), + alias_name: None + }, ]); let req = ListVirtualColumnsReq::new(&tenant, Some(u64::MAX)); @@ -6723,25 +6738,30 @@ impl SchemaApiTestSuite { if_exists: false, name_ident: name_ident.clone(), virtual_columns: vec![ - ( - "variant:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant[2]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant:k2:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k2:k4".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant[2]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant:k2:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None, + }, + VirtualField { + expr: "variant:k2:k4".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64, ))), - ), + alias_name: None, + }, ], + auto_generated: false, }; mt.update_virtual_column(req).await?; @@ -6754,24 +6774,28 @@ impl SchemaApiTestSuite { let res = mt.list_virtual_columns(req).await?; assert_eq!(1, res.len()); assert_eq!(res[0].virtual_columns, vec![ - ( - "variant:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)) - ), - ( - "variant[2]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)) - ), - ( - "variant:k2:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k2:k4".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None + }, + VirtualField { + expr: "variant[2]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None + }, + VirtualField { + expr: "variant:k2:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None + }, + VirtualField { + expr: "variant:k2:k4".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64 ))), - ), + alias_name: None + }, ]); } @@ -6799,25 +6823,30 @@ impl SchemaApiTestSuite { if_exists: false, name_ident: name_ident.clone(), virtual_columns: vec![ - ( - "variant:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant[3]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant:k3:k4".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k3:k5".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant[3]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant:k3:k4".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None, + }, + VirtualField { + expr: "variant:k3:k5".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64, ))), - ), + alias_name: None, + }, ], + auto_generated: false, }; let res = mt.update_virtual_column(req).await; @@ -6830,25 +6859,30 @@ impl SchemaApiTestSuite { create_option: CreateOption::Create, name_ident: name_ident.clone(), virtual_columns: vec![ - ( - "variant:k1".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant[1]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant:k1:k4".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k1:k5".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k1".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant[1]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant:k1:k4".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None, + }, + VirtualField { + expr: "variant:k1:k5".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64, ))), - ), + alias_name: None, + }, ], + auto_generated: false, }; mt.create_virtual_column(req.clone()).await?; @@ -6858,39 +6892,46 @@ impl SchemaApiTestSuite { let res = mt.list_virtual_columns(req).await?; assert_eq!(1, res.len()); assert_eq!(res[0].virtual_columns, vec![ - ( - "variant:k1".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)) - ), - ( - "variant[1]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)) - ), - ( - "variant:k1:k4".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), - ( - "variant:k1:k5".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number( + VirtualField { + expr: "variant:k1".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None + }, + VirtualField { + expr: "variant[1]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None + }, + VirtualField { + expr: "variant:k1:k4".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None + }, + VirtualField { + expr: "variant:k1:k5".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( NumberDataType::UInt64 ))), - ), + alias_name: None + }, ]); let req = CreateVirtualColumnReq { create_option: CreateOption::CreateOrReplace, name_ident: name_ident.clone(), virtual_columns: vec![ - ( - "variant:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "variant:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), + VirtualField { + expr: "variant:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "variant:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None, + }, ], + auto_generated: false, }; mt.create_virtual_column(req.clone()).await?; @@ -6900,14 +6941,16 @@ impl SchemaApiTestSuite { let res = mt.list_virtual_columns(req).await?; assert_eq!(1, res.len()); assert_eq!(res[0].virtual_columns, vec![ - ( - "variant:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)) - ), - ( - "variant:k3".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ) + VirtualField { + expr: "variant:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None + }, + VirtualField { + expr: "variant:k3".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None + }, ]); } diff --git a/src/meta/app/src/schema/mod.rs b/src/meta/app/src/schema/mod.rs index 88a1999860edb..5a441c3c2e5f8 100644 --- a/src/meta/app/src/schema/mod.rs +++ b/src/meta/app/src/schema/mod.rs @@ -144,4 +144,5 @@ pub use virtual_column::DropVirtualColumnReq; pub use virtual_column::ListVirtualColumnsReq; pub use virtual_column::UpdateVirtualColumnReq; pub use virtual_column::VirtualColumnMeta; +pub use virtual_column::VirtualField; pub use virtual_column_ident::VirtualColumnIdent; diff --git a/src/meta/app/src/schema/virtual_column.rs b/src/meta/app/src/schema/virtual_column.rs index 70c5665301639..c8a8d8f9644ad 100644 --- a/src/meta/app/src/schema/virtual_column.rs +++ b/src/meta/app/src/schema/virtual_column.rs @@ -26,20 +26,62 @@ use crate::schema::virtual_column_ident::VirtualColumnIdent; use crate::tenant::Tenant; use crate::tenant::ToTenant; -#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq)] +// The virtual field column definition of Variant type. +#[derive(Clone, Debug, Eq, PartialEq)] +pub struct VirtualField { + // Expression to extracts the internal virtual field of the variant value. + // for example: + // `data['key']`, `data[0]`, `data['key1']['key2']`, .. + pub expr: String, + // The data type of internal virtual field. + // If all the rows of a virtual field has same type, + // the virtual field can cast to the type. + pub data_type: TableDataType, + // Optional alias name. + pub alias_name: Option, +} + +impl Display for VirtualField { + fn fmt(&self, f: &mut Formatter) -> fmt::Result { + if let Some(alias_name) = &self.alias_name { + write!( + f, + "{}::{} AS {}", + self.expr, + self.data_type.remove_nullable(), + alias_name + ) + } else { + write!(f, "{}::{}", self.expr, self.data_type.remove_nullable()) + } + } +} + +#[derive(Clone, Debug, Eq, PartialEq)] pub struct VirtualColumnMeta { pub table_id: MetaId, - pub virtual_columns: Vec<(String, TableDataType)>, + // The internal virtual field columns of Variant type. + // For example, the data column has the following values: + // `{"id":1,"name":"tom","metas":{"key1":"val1","key2":"val2"}}` + // `{"id":2,"name":"alice","metas":{"key1":"val3","key2":"val4"}}` + // ... + // We can generate virtual columns as follows: + // `data['id']`, `data['name']`, `data['metas']['key1']`, `data['metas']['key2']` + pub virtual_columns: Vec, pub created_on: DateTime, pub updated_on: Option>, + // Whether the virtual columns are auto-generated, + // true for auto-generated, false for user-defined. + pub auto_generated: bool, } #[derive(Clone, Debug, PartialEq, Eq)] pub struct CreateVirtualColumnReq { pub create_option: CreateOption, pub name_ident: VirtualColumnIdent, - pub virtual_columns: Vec<(String, TableDataType)>, + pub virtual_columns: Vec, + pub auto_generated: bool, } impl Display for CreateVirtualColumnReq { @@ -57,7 +99,8 @@ impl Display for CreateVirtualColumnReq { pub struct UpdateVirtualColumnReq { pub if_exists: bool, pub name_ident: VirtualColumnIdent, - pub virtual_columns: Vec<(String, TableDataType)>, + pub virtual_columns: Vec, + pub auto_generated: bool, } impl Display for UpdateVirtualColumnReq { diff --git a/src/meta/proto-conv/src/util.rs b/src/meta/proto-conv/src/util.rs index 4c8c8db4e656b..0baed0196fc8d 100644 --- a/src/meta/proto-conv/src/util.rs +++ b/src/meta/proto-conv/src/util.rs @@ -147,7 +147,8 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[ (115, "2024-12-16: Add: udf.proto: add UDAFScript and UDAFServer"), (116, "2025-01-09: Add: MarkedDeletedIndexMeta"), (117, "2025-01-21: Add: config.proto: add disable_list_batch in WebhdfsConfig"), - (118, "2025-01-22: Add: config.proto: add user_name in WebhdfsConfig") + (118, "2025-01-22: Add: config.proto: add user_name in WebhdfsConfig"), + (119, "2025-01-25: Add: virtual_column add alias_names and auto_generated field"), // Dear developer: // If you're gonna add a new metadata version, you'll have to add a test for it. // You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`) diff --git a/src/meta/proto-conv/src/virtual_column_from_to_protobuf_impl.rs b/src/meta/proto-conv/src/virtual_column_from_to_protobuf_impl.rs index 034affca046f7..311faa39b8927 100644 --- a/src/meta/proto-conv/src/virtual_column_from_to_protobuf_impl.rs +++ b/src/meta/proto-conv/src/virtual_column_from_to_protobuf_impl.rs @@ -15,6 +15,8 @@ //! This mod is the key point about compatibility. //! Everytime update anything in this file, update the `VER` and let the tests pass. +use std::collections::BTreeMap; + use chrono::DateTime; use chrono::Utc; use databend_common_expression::TableDataType; @@ -37,30 +39,28 @@ impl FromToProto for mt::VirtualColumnMeta { fn from_pb(p: Self::PB) -> Result where Self: Sized { reader_check_msg(p.ver, p.min_reader_ver)?; - let virtual_columns = if p.data_types.is_empty() { - p.virtual_columns - .iter() - .map(|v| { - ( - v.clone(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ) - }) - .collect() - } else { - if p.virtual_columns.len() != p.data_types.len() { - return Err(Incompatible::new(format!( - "Incompatible virtual columns length is {}, but data types length is {}", - p.virtual_columns.len(), - p.data_types.len() - ))); - } - let mut virtual_columns = Vec::new(); - for (v, ty) in p.virtual_columns.iter().zip(p.data_types.iter()) { - virtual_columns.push((v.clone(), TableDataType::from_pb(ty.clone())?)); - } - virtual_columns - }; + if !p.data_types.is_empty() && p.virtual_columns.len() != p.data_types.len() { + return Err(Incompatible::new(format!( + "Incompatible virtual columns length is {}, but data types length is {}", + p.virtual_columns.len(), + p.data_types.len() + ))); + } + let mut virtual_columns = Vec::with_capacity(p.virtual_columns.len()); + for (i, expr) in p.virtual_columns.iter().enumerate() { + let data_type = if let Some(ty) = p.data_types.get(i) { + TableDataType::from_pb(ty.clone())? + } else { + TableDataType::Nullable(Box::new(TableDataType::Variant)) + }; + let alias_name = p.alias_names.get(&(i as u64)).cloned(); + let virtual_column = mt::VirtualField { + expr: expr.clone(), + data_type, + alias_name, + }; + virtual_columns.push(virtual_column); + } let v = Self { table_id: p.table_id, @@ -70,16 +70,21 @@ impl FromToProto for mt::VirtualColumnMeta { Some(updated_on) => Some(DateTime::::from_pb(updated_on)?), None => None, }, + auto_generated: p.auto_generated, }; Ok(v) } fn to_pb(&self) -> Result { - let mut data_types = Vec::new(); - let mut virtual_columns = Vec::new(); - for (v, ty) in self.virtual_columns.iter() { - data_types.push(ty.to_pb()?); - virtual_columns.push(v.clone()); + let mut data_types = Vec::with_capacity(self.virtual_columns.len()); + let mut virtual_columns = Vec::with_capacity(self.virtual_columns.len()); + let mut alias_names = BTreeMap::new(); + for (i, virtual_field) in self.virtual_columns.iter().enumerate() { + data_types.push(virtual_field.data_type.to_pb()?); + virtual_columns.push(virtual_field.expr.clone()); + if let Some(alias_name) = &virtual_field.alias_name { + alias_names.insert(i as u64, alias_name.clone()); + } } let p = pb::VirtualColumnMeta { ver: VER, @@ -92,6 +97,8 @@ impl FromToProto for mt::VirtualColumnMeta { None => None, }, data_types, + alias_names, + auto_generated: self.auto_generated, }; Ok(p) } diff --git a/src/meta/proto-conv/tests/it/main.rs b/src/meta/proto-conv/tests/it/main.rs index aadf166f7041b..70009ddb71a65 100644 --- a/src/meta/proto-conv/tests/it/main.rs +++ b/src/meta/proto-conv/tests/it/main.rs @@ -116,3 +116,4 @@ mod v115_add_udaf_script; mod v116_marked_deleted_index_meta; mod v117_webhdfs_add_disable_list_batch; mod v118_webhdfs_add_user_name; +mod v119_virtual_column; diff --git a/src/meta/proto-conv/tests/it/v041_virtual_column.rs b/src/meta/proto-conv/tests/it/v041_virtual_column.rs index e3719ce580c6c..dcc5beabea945 100644 --- a/src/meta/proto-conv/tests/it/v041_virtual_column.rs +++ b/src/meta/proto-conv/tests/it/v041_virtual_column.rs @@ -16,6 +16,7 @@ use chrono::TimeZone; use chrono::Utc; use databend_common_expression::TableDataType; use databend_common_meta_app::schema::VirtualColumnMeta; +use databend_common_meta_app::schema::VirtualField; use fastrace::func_name; use crate::common; @@ -42,23 +43,27 @@ fn test_decode_v41_virtual_column() -> anyhow::Result<()> { let want = || { let table_id = 7; let virtual_columns = vec![ - ( - "v:k1:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "v[1][2]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), + VirtualField { + expr: "v:k1:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "v[1][2]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, ]; let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap(); let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap()); + let auto_generated = false; VirtualColumnMeta { table_id, virtual_columns, created_on, updated_on, + auto_generated, } }; diff --git a/src/meta/proto-conv/tests/it/v112_virtual_column.rs b/src/meta/proto-conv/tests/it/v112_virtual_column.rs index dce9eadd58622..2b3dc6aaf037c 100644 --- a/src/meta/proto-conv/tests/it/v112_virtual_column.rs +++ b/src/meta/proto-conv/tests/it/v112_virtual_column.rs @@ -16,6 +16,7 @@ use chrono::TimeZone; use chrono::Utc; use databend_common_expression::TableDataType; use databend_common_meta_app::schema::VirtualColumnMeta; +use databend_common_meta_app::schema::VirtualField; use fastrace::func_name; use crate::common; @@ -45,27 +46,32 @@ fn test_decode_v112_virtual_column() -> anyhow::Result<()> { let want = || { let table_id = 7; let virtual_columns = vec![ - ( - "v:k1:k2".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "v[1][2]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "v:k3:k4".to_string(), - TableDataType::Nullable(Box::new(TableDataType::String)), - ), + VirtualField { + expr: "v:k1:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "v[1][2]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "v:k3:k4".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: None, + }, ]; let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap(); let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap()); + let auto_generated = false; VirtualColumnMeta { table_id, virtual_columns, created_on, updated_on, + auto_generated, } }; diff --git a/src/meta/proto-conv/tests/it/v119_virtual_column.rs b/src/meta/proto-conv/tests/it/v119_virtual_column.rs new file mode 100644 index 0000000000000..cb02639890261 --- /dev/null +++ b/src/meta/proto-conv/tests/it/v119_virtual_column.rs @@ -0,0 +1,83 @@ +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use chrono::TimeZone; +use chrono::Utc; +use databend_common_expression::TableDataType; +use databend_common_meta_app::schema::VirtualColumnMeta; +use databend_common_meta_app::schema::VirtualField; +use fastrace::func_name; + +use crate::common; + +// These bytes are built when a new version in introduced, +// and are kept for backward compatibility test. +// +// ************************************************************* +// * These messages should never be updated, * +// * only be added when a new version is added, * +// * or be removed when an old version is no longer supported. * +// ************************************************************* +// +// The message bytes are built from the output of `proto_conv::test_build_pb_buf()` +#[test] +fn test_decode_v119_virtual_column() -> anyhow::Result<()> { + let schema_v119 = vec![ + 8, 7, 18, 7, 118, 58, 107, 49, 58, 107, 50, 18, 7, 118, 91, 49, 93, 91, 50, 93, 18, 7, 118, + 58, 107, 51, 58, 107, 52, 26, 23, 50, 48, 50, 51, 45, 48, 51, 45, 48, 57, 32, 49, 48, 58, + 48, 48, 58, 48, 48, 32, 85, 84, 67, 34, 23, 50, 48, 50, 51, 45, 48, 53, 45, 50, 57, 32, 49, + 48, 58, 48, 48, 58, 48, 48, 32, 85, 84, 67, 42, 18, 178, 2, 9, 210, 2, 0, 160, 6, 119, 168, + 6, 24, 160, 6, 119, 168, 6, 24, 42, 18, 178, 2, 9, 210, 2, 0, 160, 6, 119, 168, 6, 24, 160, + 6, 119, 168, 6, 24, 42, 18, 178, 2, 9, 146, 2, 0, 160, 6, 119, 168, 6, 24, 160, 6, 119, + 168, 6, 24, 50, 10, 8, 1, 18, 6, 118, 97, 108, 117, 101, 49, 50, 10, 8, 2, 18, 6, 118, 97, + 108, 117, 101, 50, 56, 1, 160, 6, 119, 168, 6, 24, + ]; + + let want = || { + let table_id = 7; + let virtual_columns = vec![ + VirtualField { + expr: "v:k1:k2".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "v[1][2]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: Some("value1".to_string()), + }, + VirtualField { + expr: "v:k3:k4".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::String)), + alias_name: Some("value2".to_string()), + }, + ]; + let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap(); + let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap()); + let auto_generated = true; + + VirtualColumnMeta { + table_id, + virtual_columns, + created_on, + updated_on, + auto_generated, + } + }; + + common::test_pb_from_to(func_name!(), want())?; + common::test_load_old(func_name!(), schema_v119.as_slice(), 119, want())?; + + Ok(()) +} diff --git a/src/meta/protos/proto/virtual_column.proto b/src/meta/protos/proto/virtual_column.proto index 92692f9e2c3b1..4cf4e56ac71ea 100644 --- a/src/meta/protos/proto/virtual_column.proto +++ b/src/meta/protos/proto/virtual_column.proto @@ -1,44 +1,52 @@ -// Copyright 2021 Datafuse Labs -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -// The identifier of a database by name. Names can be changed. -// There is no guarantee that two get-database request by name will return the -// same instance. - -syntax = "proto3"; - -package databend_proto; - -import "datatype.proto"; - -// VirtualColumnMeta is a container of virtual columns information. -message VirtualColumnMeta { - uint64 ver = 100; - uint64 min_reader_ver = 101; - - // The table_id virtual columns belong to. - uint64 table_id = 1; - - // Exprs of each virtual columns. - repeated string virtual_columns = 2; - - // The time virtual column created. - string created_on = 3; - - // The time virtual column updated. - optional string updated_on = 4; - - // virtual column data type - repeated DataType data_types = 5; -} +// Copyright 2021 Datafuse Labs +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// The identifier of a database by name. Names can be changed. +// There is no guarantee that two get-database request by name will return the +// same instance. + +syntax = "proto3"; + +package databend_proto; + +import "datatype.proto"; + +// VirtualColumnMeta is a container of virtual columns information. +message VirtualColumnMeta { + uint64 ver = 100; + uint64 min_reader_ver = 101; + + // The table_id virtual columns belong to. + uint64 table_id = 1; + + // Exprs of each virtual columns. + repeated string virtual_columns = 2; + + // The time virtual column created. + string created_on = 3; + + // The time virtual column updated. + optional string updated_on = 4; + + // virtual column data type + repeated DataType data_types = 5; + + // virtual column alias names, + // key is the index of `virtual_columns` field. + map alias_names = 6; + + // whether the virtual columns are auto-generated, + // true for auto-generated, false for user-defined. + bool auto_generated = 7; +} diff --git a/src/query/ast/src/ast/statements/virtual_column.rs b/src/query/ast/src/ast/statements/virtual_column.rs index 17e5a4d466c66..38415521c4c87 100644 --- a/src/query/ast/src/ast/statements/virtual_column.rs +++ b/src/query/ast/src/ast/statements/virtual_column.rs @@ -25,6 +25,23 @@ use crate::ast::Expr; use crate::ast::Identifier; use crate::ast::ShowLimit; +#[derive(Debug, Clone, PartialEq, Drive, DriveMut)] +pub struct VirtualColumn { + pub expr: Box, + pub alias: Option, +} + +impl Display for VirtualColumn { + fn fmt(&self, f: &mut Formatter) -> std::fmt::Result { + if let Some(alias) = &self.alias { + write!(f, "{} AS {}", self.expr, alias)?; + } else { + write!(f, "{}", self.expr)?; + } + Ok(()) + } +} + #[derive(Debug, Clone, PartialEq, Drive, DriveMut)] pub struct CreateVirtualColumnStmt { pub create_option: CreateOption, @@ -32,7 +49,7 @@ pub struct CreateVirtualColumnStmt { pub database: Option, pub table: Identifier, - pub virtual_columns: Vec, + pub virtual_columns: Vec, } impl Display for CreateVirtualColumnStmt { @@ -66,7 +83,7 @@ pub struct AlterVirtualColumnStmt { pub database: Option, pub table: Identifier, - pub virtual_columns: Vec, + pub virtual_columns: Vec, } impl Display for AlterVirtualColumnStmt { diff --git a/src/query/ast/src/parser/statement.rs b/src/query/ast/src/parser/statement.rs index c31f71f691d4a..1ba7e31c9b554 100644 --- a/src/query/ast/src/parser/statement.rs +++ b/src/query/ast/src/parser/statement.rs @@ -1413,7 +1413,7 @@ pub fn statement_body(i: Input) -> IResult { ~ ( OR ~ ^REPLACE )? ~ VIRTUAL ~ COLUMN ~ ( IF ~ ^NOT ~ ^EXISTS )? - ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")" + ~ ^"(" ~ ^#comma_separated_list1(virtual_column) ~ ^")" ~ FOR ~ #dot_separated_idents_1_to_3 }, |( @@ -1442,7 +1442,7 @@ pub fn statement_body(i: Input) -> IResult { let alter_virtual_column = map( rule! { - ALTER ~ VIRTUAL ~ COLUMN ~ ( IF ~ ^EXISTS )? ~ ^"(" ~ ^#comma_separated_list1(expr) ~ ^")" ~ FOR ~ #dot_separated_idents_1_to_3 + ALTER ~ VIRTUAL ~ COLUMN ~ ( IF ~ ^EXISTS )? ~ ^"(" ~ ^#comma_separated_list1(virtual_column) ~ ^")" ~ FOR ~ #dot_separated_idents_1_to_3 }, |(_, _, _, opt_if_exists, _, virtual_columns, _, _, (catalog, database, table))| { Statement::AlterVirtualColumn(AlterVirtualColumnStmt { @@ -4849,3 +4849,15 @@ pub fn alter_notification_options(i: Input) -> IResult |opts| opts, )(i) } + +pub fn virtual_column(i: Input) -> IResult { + map( + rule! { + #expr ~ #alias_name? + }, + |(expr, alias)| VirtualColumn { + expr: Box::new(expr), + alias, + }, + )(i) +} diff --git a/src/query/ast/tests/it/parser.rs b/src/query/ast/tests/it/parser.rs index d5127655652d8..e43e3a3c2581b 100644 --- a/src/query/ast/tests/it/parser.rs +++ b/src/query/ast/tests/it/parser.rs @@ -615,8 +615,8 @@ fn test_statement() { r#"DESC MASKING POLICY email_mask"#, r#"DROP MASKING POLICY IF EXISTS email_mask"#, r#"CREATE VIRTUAL COLUMN (a['k1']['k2'], b[0][1]) FOR t"#, - r#"CREATE OR REPLACE VIRTUAL COLUMN (a['k1']['k2'], b[0][1]) FOR t"#, - r#"ALTER VIRTUAL COLUMN (a['k1']['k2'], b[0][1]) FOR t"#, + r#"CREATE OR REPLACE VIRTUAL COLUMN (a['k1']['k2']::string as v1, b[0][1]::int as v2) FOR t"#, + r#"ALTER VIRTUAL COLUMN (a['k1']['k2'] as v1, b[0][1] as v2) FOR t"#, r#"DROP VIRTUAL COLUMN FOR t"#, r#"REFRESH VIRTUAL COLUMN FOR t"#, r#"CREATE NETWORK POLICY mypolicy ALLOWED_IP_LIST=('192.168.10.0/24') BLOCKED_IP_LIST=('192.168.10.99') COMMENT='test'"#, diff --git a/src/query/ast/tests/it/testdata/stmt.txt b/src/query/ast/tests/it/testdata/stmt.txt index 4f8cfea435e4d..c5f0f9103b0a9 100644 --- a/src/query/ast/tests/it/testdata/stmt.txt +++ b/src/query/ast/tests/it/testdata/stmt.txt @@ -19511,103 +19511,109 @@ CreateVirtualColumn( ident_type: None, }, virtual_columns: [ - MapAccess { - span: Some( - 30..36, - ), + VirtualColumn { expr: MapAccess { span: Some( - 24..30, + 30..36, ), - expr: ColumnRef { + expr: MapAccess { span: Some( - 23..24, + 24..30, ), - column: ColumnRef { - database: None, - table: None, - column: Name( - Identifier { - span: Some( - 23..24, - ), - name: "a", - quote: None, - ident_type: None, - }, + expr: ColumnRef { + span: Some( + 23..24, ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 23..24, + ), + name: "a", + quote: None, + ident_type: None, + }, + ), + }, + }, + accessor: Bracket { + key: Literal { + span: Some( + 25..29, + ), + value: String( + "k1", + ), + }, }, }, accessor: Bracket { key: Literal { span: Some( - 25..29, + 31..35, ), value: String( - "k1", + "k2", ), }, }, }, - accessor: Bracket { - key: Literal { - span: Some( - 31..35, - ), - value: String( - "k2", - ), - }, - }, + alias: None, }, - MapAccess { - span: Some( - 42..45, - ), + VirtualColumn { expr: MapAccess { span: Some( - 39..42, + 42..45, ), - expr: ColumnRef { + expr: MapAccess { span: Some( - 38..39, + 39..42, ), - column: ColumnRef { - database: None, - table: None, - column: Name( - Identifier { - span: Some( - 38..39, - ), - name: "b", - quote: None, - ident_type: None, - }, + expr: ColumnRef { + span: Some( + 38..39, ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 38..39, + ), + name: "b", + quote: None, + ident_type: None, + }, + ), + }, + }, + accessor: Bracket { + key: Literal { + span: Some( + 40..41, + ), + value: UInt64( + 0, + ), + }, }, }, accessor: Bracket { key: Literal { span: Some( - 40..41, + 43..44, ), value: UInt64( - 0, + 1, ), }, }, }, - accessor: Bracket { - key: Literal { - span: Some( - 43..44, - ), - value: UInt64( - 1, - ), - }, - }, + alias: None, }, ], }, @@ -19615,9 +19621,9 @@ CreateVirtualColumn( ---------- Input ---------- -CREATE OR REPLACE VIRTUAL COLUMN (a['k1']['k2'], b[0][1]) FOR t +CREATE OR REPLACE VIRTUAL COLUMN (a['k1']['k2']::string as v1, b[0][1]::int as v2) FOR t ---------- Output --------- -CREATE OR REPLACE VIRTUAL COLUMN (a['k1']['k2'], b[0][1]) FOR t +CREATE OR REPLACE VIRTUAL COLUMN (a['k1']['k2']::STRING AS v1, b[0][1]::Int32 AS v2) FOR t ---------- AST ------------ CreateVirtualColumn( CreateVirtualColumnStmt { @@ -19626,110 +19632,148 @@ CreateVirtualColumn( database: None, table: Identifier { span: Some( - 62..63, + 87..88, ), name: "t", quote: None, ident_type: None, }, virtual_columns: [ - MapAccess { - span: Some( - 41..47, - ), - expr: MapAccess { + VirtualColumn { + expr: Cast { span: Some( - 35..41, + 47..55, ), - expr: ColumnRef { + expr: MapAccess { span: Some( - 34..35, + 41..47, ), - column: ColumnRef { - database: None, - table: None, - column: Name( - Identifier { + expr: MapAccess { + span: Some( + 35..41, + ), + expr: ColumnRef { + span: Some( + 34..35, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 34..35, + ), + name: "a", + quote: None, + ident_type: None, + }, + ), + }, + }, + accessor: Bracket { + key: Literal { span: Some( - 34..35, + 36..40, + ), + value: String( + "k1", ), - name: "a", - quote: None, - ident_type: None, }, - ), + }, }, - }, - accessor: Bracket { - key: Literal { - span: Some( - 36..40, - ), - value: String( - "k1", - ), + accessor: Bracket { + key: Literal { + span: Some( + 42..46, + ), + value: String( + "k2", + ), + }, }, }, + target_type: String, + pg_style: true, }, - accessor: Bracket { - key: Literal { + alias: Some( + Identifier { span: Some( - 42..46, - ), - value: String( - "k2", + 59..61, ), + name: "v1", + quote: None, + ident_type: None, }, - }, - }, - MapAccess { - span: Some( - 53..56, ), - expr: MapAccess { + }, + VirtualColumn { + expr: Cast { span: Some( - 50..53, + 70..75, ), - expr: ColumnRef { + expr: MapAccess { span: Some( - 49..50, + 67..70, ), - column: ColumnRef { - database: None, - table: None, - column: Name( - Identifier { + expr: MapAccess { + span: Some( + 64..67, + ), + expr: ColumnRef { + span: Some( + 63..64, + ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 63..64, + ), + name: "b", + quote: None, + ident_type: None, + }, + ), + }, + }, + accessor: Bracket { + key: Literal { span: Some( - 49..50, + 65..66, + ), + value: UInt64( + 0, ), - name: "b", - quote: None, - ident_type: None, }, - ), + }, }, - }, - accessor: Bracket { - key: Literal { - span: Some( - 51..52, - ), - value: UInt64( - 0, - ), + accessor: Bracket { + key: Literal { + span: Some( + 68..69, + ), + value: UInt64( + 1, + ), + }, }, }, + target_type: Int32, + pg_style: true, }, - accessor: Bracket { - key: Literal { + alias: Some( + Identifier { span: Some( - 54..55, - ), - value: UInt64( - 1, + 79..81, ), + name: "v2", + quote: None, + ident_type: None, }, - }, + ), }, ], }, @@ -19737,9 +19781,9 @@ CreateVirtualColumn( ---------- Input ---------- -ALTER VIRTUAL COLUMN (a['k1']['k2'], b[0][1]) FOR t +ALTER VIRTUAL COLUMN (a['k1']['k2'] as v1, b[0][1] as v2) FOR t ---------- Output --------- -ALTER VIRTUAL COLUMN (a['k1']['k2'], b[0][1]) FOR t +ALTER VIRTUAL COLUMN (a['k1']['k2'] AS v1, b[0][1] AS v2) FOR t ---------- AST ------------ AlterVirtualColumn( AlterVirtualColumnStmt { @@ -19748,110 +19792,134 @@ AlterVirtualColumn( database: None, table: Identifier { span: Some( - 50..51, + 62..63, ), name: "t", quote: None, ident_type: None, }, virtual_columns: [ - MapAccess { - span: Some( - 29..35, - ), + VirtualColumn { expr: MapAccess { span: Some( - 23..29, + 29..35, ), - expr: ColumnRef { + expr: MapAccess { span: Some( - 22..23, + 23..29, ), - column: ColumnRef { - database: None, - table: None, - column: Name( - Identifier { - span: Some( - 22..23, - ), - name: "a", - quote: None, - ident_type: None, - }, + expr: ColumnRef { + span: Some( + 22..23, ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 22..23, + ), + name: "a", + quote: None, + ident_type: None, + }, + ), + }, + }, + accessor: Bracket { + key: Literal { + span: Some( + 24..28, + ), + value: String( + "k1", + ), + }, }, }, accessor: Bracket { key: Literal { span: Some( - 24..28, + 30..34, ), value: String( - "k1", + "k2", ), }, }, }, - accessor: Bracket { - key: Literal { + alias: Some( + Identifier { span: Some( - 30..34, - ), - value: String( - "k2", + 39..41, ), + name: "v1", + quote: None, + ident_type: None, }, - }, - }, - MapAccess { - span: Some( - 41..44, ), + }, + VirtualColumn { expr: MapAccess { span: Some( - 38..41, + 47..50, ), - expr: ColumnRef { + expr: MapAccess { span: Some( - 37..38, + 44..47, ), - column: ColumnRef { - database: None, - table: None, - column: Name( - Identifier { - span: Some( - 37..38, - ), - name: "b", - quote: None, - ident_type: None, - }, + expr: ColumnRef { + span: Some( + 43..44, ), + column: ColumnRef { + database: None, + table: None, + column: Name( + Identifier { + span: Some( + 43..44, + ), + name: "b", + quote: None, + ident_type: None, + }, + ), + }, + }, + accessor: Bracket { + key: Literal { + span: Some( + 45..46, + ), + value: UInt64( + 0, + ), + }, }, }, accessor: Bracket { key: Literal { span: Some( - 39..40, + 48..49, ), value: UInt64( - 0, + 1, ), }, }, }, - accessor: Bracket { - key: Literal { + alias: Some( + Identifier { span: Some( - 42..43, - ), - value: UInt64( - 1, + 54..56, ), + name: "v2", + quote: None, + ident_type: None, }, - }, + ), }, ], }, diff --git a/src/query/ee/src/storages/fuse/operations/virtual_columns.rs b/src/query/ee/src/storages/fuse/operations/virtual_columns.rs index 705c87663251f..2b070e0b4fe34 100644 --- a/src/query/ee/src/storages/fuse/operations/virtual_columns.rs +++ b/src/query/ee/src/storages/fuse/operations/virtual_columns.rs @@ -37,6 +37,7 @@ use databend_common_expression::TableSchemaRef; use databend_common_expression::TableSchemaRefExt; use databend_common_functions::BUILTIN_FUNCTIONS; use databend_common_io::constants::DEFAULT_BLOCK_BUFFER_SIZE; +use databend_common_meta_app::schema::VirtualField; use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_bytes; use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_milliseconds; use databend_common_metrics::storage::metrics_inc_block_virtual_column_write_nums; @@ -83,7 +84,7 @@ use opendal::Operator; pub async fn do_refresh_virtual_column( ctx: Arc, fuse_table: &FuseTable, - virtual_columns: Vec<(String, TableDataType)>, + virtual_columns: Vec, segment_locs: Option>, pipeline: &mut Pipeline, ) -> Result<()> { @@ -107,7 +108,7 @@ pub async fn do_refresh_virtual_column( if f.data_type().remove_nullable() != TableDataType::Variant { continue; } - let is_src_field = virtual_columns.iter().any(|v| v.0.starts_with(f.name())); + let is_src_field = virtual_columns.iter().any(|v| v.expr.starts_with(f.name())); if is_src_field { field_indices.push(i); } @@ -175,7 +176,7 @@ pub async fn do_refresh_virtual_column( virtual_table_schema .fields .iter() - .any(|f| *f.name() == v.0 && *f.data_type() == v.1) + .any(|f| *f.name() == v.expr && *f.data_type() == v.data_type) }) } else { false @@ -208,20 +209,23 @@ pub async fn do_refresh_virtual_column( let mut virtual_fields = Vec::with_capacity(virtual_columns.len()); let mut virtual_exprs = Vec::with_capacity(virtual_columns.len()); - for (virtual_column, virtual_type) in virtual_columns { - let mut virtual_expr = - parse_computed_expr(ctx.clone(), source_schema.clone(), &virtual_column)?; + for virtual_column_field in virtual_columns { + let mut virtual_expr = parse_computed_expr( + ctx.clone(), + source_schema.clone(), + &virtual_column_field.expr, + )?; - if virtual_type.remove_nullable() != TableDataType::Variant { + if virtual_column_field.data_type.remove_nullable() != TableDataType::Variant { virtual_expr = Expr::Cast { span: None, is_try: true, expr: Box::new(virtual_expr), - dest_type: (&virtual_type).into(), + dest_type: (&virtual_column_field.data_type).into(), } } let virtual_field = TableField::new( - &virtual_column, + &virtual_column_field.expr, infer_schema_type(virtual_expr.data_type())?, ); virtual_exprs.push(virtual_expr); diff --git a/src/query/ee/src/virtual_column/virtual_column_handler.rs b/src/query/ee/src/virtual_column/virtual_column_handler.rs index c6ad11019d360..0dcc71b983230 100644 --- a/src/query/ee/src/virtual_column/virtual_column_handler.rs +++ b/src/query/ee/src/virtual_column/virtual_column_handler.rs @@ -18,12 +18,12 @@ use databend_common_base::base::GlobalInstance; use databend_common_catalog::catalog::Catalog; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::TableDataType; use databend_common_meta_app::schema::CreateVirtualColumnReq; use databend_common_meta_app::schema::DropVirtualColumnReq; use databend_common_meta_app::schema::ListVirtualColumnsReq; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::VirtualColumnMeta; +use databend_common_meta_app::schema::VirtualField; use databend_common_pipeline_core::Pipeline; use databend_common_storages_fuse::FuseTable; use databend_enterprise_virtual_column::VirtualColumnHandler; @@ -76,7 +76,7 @@ impl VirtualColumnHandler for RealVirtualColumnHandler { &self, ctx: Arc, fuse_table: &FuseTable, - virtual_columns: Vec<(String, TableDataType)>, + virtual_columns: Vec, segment_locs: Option>, pipeline: &mut Pipeline, ) -> Result<()> { diff --git a/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs b/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs index 2259feaf9bf7b..149a7766f0bba 100644 --- a/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs +++ b/src/query/ee/tests/it/storages/fuse/operations/virtual_columns.rs @@ -16,6 +16,7 @@ use databend_common_base::base::tokio; use databend_common_exception::Result; use databend_common_expression::types::NumberDataType; use databend_common_expression::TableDataType; +use databend_common_meta_app::schema::VirtualField; use databend_common_storage::read_parquet_schema_async_rs; use databend_common_storages_fuse::io::BlockReader; use databend_common_storages_fuse::io::MetaReaders; @@ -50,18 +51,23 @@ async fn test_fuse_do_refresh_virtual_column() -> Result<()> { let dal = fuse_table.get_operator_ref(); let virtual_columns = vec![ - ( - "v['a']".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "v[0]".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Variant)), - ), - ( - "v['b']".to_string(), - TableDataType::Nullable(Box::new(TableDataType::Number(NumberDataType::Int64))), - ), + VirtualField { + expr: "v['a']".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "v[0]".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)), + alias_name: None, + }, + VirtualField { + expr: "v['b']".to_string(), + data_type: TableDataType::Nullable(Box::new(TableDataType::Number( + NumberDataType::Int64, + ))), + alias_name: None, + }, ]; let table_ctx = fixture.new_query_ctx().await?; diff --git a/src/query/ee_features/virtual_column/Cargo.toml b/src/query/ee_features/virtual_column/Cargo.toml index 6766f630b6c33..cd95d376abaf6 100644 --- a/src/query/ee_features/virtual_column/Cargo.toml +++ b/src/query/ee_features/virtual_column/Cargo.toml @@ -18,7 +18,6 @@ async-trait = { workspace = true } databend-common-base = { workspace = true } databend-common-catalog = { workspace = true } databend-common-exception = { workspace = true } -databend-common-expression = { workspace = true } databend-common-meta-app = { workspace = true } databend-common-pipeline-core = { workspace = true } databend-common-storages-fuse = { workspace = true } diff --git a/src/query/ee_features/virtual_column/src/virtual_column.rs b/src/query/ee_features/virtual_column/src/virtual_column.rs index 54e123dc57d81..6a40b5cd8131e 100644 --- a/src/query/ee_features/virtual_column/src/virtual_column.rs +++ b/src/query/ee_features/virtual_column/src/virtual_column.rs @@ -18,12 +18,12 @@ use databend_common_base::base::GlobalInstance; use databend_common_catalog::catalog::Catalog; use databend_common_catalog::table_context::TableContext; use databend_common_exception::Result; -use databend_common_expression::TableDataType; use databend_common_meta_app::schema::CreateVirtualColumnReq; use databend_common_meta_app::schema::DropVirtualColumnReq; use databend_common_meta_app::schema::ListVirtualColumnsReq; use databend_common_meta_app::schema::UpdateVirtualColumnReq; use databend_common_meta_app::schema::VirtualColumnMeta; +use databend_common_meta_app::schema::VirtualField; use databend_common_pipeline_core::Pipeline; use databend_common_storages_fuse::FuseTable; use databend_storages_common_table_meta::meta::Location; @@ -58,7 +58,7 @@ pub trait VirtualColumnHandler: Sync + Send { &self, ctx: Arc, fuse_table: &FuseTable, - virtual_columns: Vec<(String, TableDataType)>, + virtual_columns: Vec, segment_locs: Option>, pipeline: &mut Pipeline, ) -> Result<()>; @@ -114,7 +114,7 @@ impl VirtualColumnHandlerWrapper { &self, ctx: Arc, fuse_table: &FuseTable, - virtual_columns: Vec<(String, TableDataType)>, + virtual_columns: Vec, segment_locs: Option>, pipeline: &mut Pipeline, ) -> Result<()> { diff --git a/src/query/service/src/interpreters/interpreter_table_modify_column.rs b/src/query/service/src/interpreters/interpreter_table_modify_column.rs index bc4cfb0d6f510..873e0719b17be 100644 --- a/src/query/service/src/interpreters/interpreter_table_modify_column.rs +++ b/src/query/service/src/interpreters/interpreter_table_modify_column.rs @@ -165,6 +165,8 @@ impl ModifyTableColumnInterpreter { let default_expr = default_expr.to_string(); new_schema.fields[i].default_expr = Some(default_expr); let _ = field_default_value(self.ctx.clone(), &new_schema.fields[i])?; + } else { + new_schema.fields[i].default_expr = None; } if old_field.data_type != field.data_type { // Check if this column is referenced by computed columns. diff --git a/src/query/service/src/interpreters/interpreter_virtual_column_alter.rs b/src/query/service/src/interpreters/interpreter_virtual_column_alter.rs index 5584de3c6c77d..50650a0b9db61 100644 --- a/src/query/service/src/interpreters/interpreter_virtual_column_alter.rs +++ b/src/query/service/src/interpreters/interpreter_virtual_column_alter.rs @@ -73,6 +73,7 @@ impl Interpreter for AlterVirtualColumnInterpreter { if_exists: self.plan.if_exists, name_ident: VirtualColumnIdent::new(&tenant, table_id), virtual_columns: self.plan.virtual_columns.clone(), + auto_generated: self.plan.auto_generated, }; let handler = get_virtual_column_handler(); diff --git a/src/query/service/src/interpreters/interpreter_virtual_column_create.rs b/src/query/service/src/interpreters/interpreter_virtual_column_create.rs index 5e673406a17d9..0038e1372a615 100644 --- a/src/query/service/src/interpreters/interpreter_virtual_column_create.rs +++ b/src/query/service/src/interpreters/interpreter_virtual_column_create.rs @@ -73,6 +73,7 @@ impl Interpreter for CreateVirtualColumnInterpreter { create_option: self.plan.create_option, name_ident: VirtualColumnIdent::new(tenant, table_id), virtual_columns: self.plan.virtual_columns.clone(), + auto_generated: self.plan.auto_generated, }; let handler = get_virtual_column_handler(); diff --git a/src/query/service/tests/it/pipelines/filter/random_filter_expr.rs b/src/query/service/tests/it/pipelines/filter/random_filter_expr.rs index 8c01ab040a004..e53100877493e 100644 --- a/src/query/service/tests/it/pipelines/filter/random_filter_expr.rs +++ b/src/query/service/tests/it/pipelines/filter/random_filter_expr.rs @@ -151,7 +151,7 @@ fn convert_predicate_tree_to_scalar_expr(node: PredicateNode, data_type: &DataTy index: 0, data_type: Box::new(data_type.clone()), visibility: Visibility::Visible, - virtual_computed_expr: None, + virtual_expr: None, }; let scalar_expr = ScalarExpr::BoundColumnRef(BoundColumnRef { span: None, column }); ScalarExpr::FunctionCall(FunctionCall { diff --git a/src/query/sql/src/planner/binder/bind_context.rs b/src/query/sql/src/planner/binder/bind_context.rs index b52c3a733aa80..4acf198825e38 100644 --- a/src/query/sql/src/planner/binder/bind_context.rs +++ b/src/query/sql/src/planner/binder/bind_context.rs @@ -123,6 +123,21 @@ pub struct VirtualColumnContext { /// The is used to generate virtual column id for virtual columns. /// Not a real column id, only used to identify a virtual column. pub next_column_ids: HashMap, + /// virtual column alias names + pub virtual_columns: Vec, +} + +impl VirtualColumnContext { + fn with_parent(parent: &VirtualColumnContext) -> VirtualColumnContext { + VirtualColumnContext { + allow_pushdown: parent.allow_pushdown, + table_indices: HashSet::new(), + virtual_column_indices: HashMap::new(), + virtual_column_names: HashMap::new(), + next_column_ids: HashMap::new(), + virtual_columns: Vec::new(), + } + } } /// `BindContext` stores all the free variables in a query and tracks the context of binding procedure. @@ -252,7 +267,9 @@ impl BindContext { have_udf_script: false, have_udf_server: false, inverted_index_map: Box::default(), - virtual_column_context: Default::default(), + virtual_column_context: VirtualColumnContext::with_parent( + &parent.virtual_column_context, + ), expr_context: ExprContext::default(), planning_agg_index: false, window_definitions: DashMap::new(), @@ -428,6 +445,17 @@ impl BindContext { return; } + // look up virtual column alias names + for column_binding in bind_context.virtual_column_context.virtual_columns.iter() { + if Self::match_column_binding(database, table, column, column_binding) { + result.push(NameResolutionResult::Column(column_binding.clone())); + } + } + + if !result.is_empty() { + return; + } + if let Some(ref parent) = bind_context.parent { bind_context = parent; } else { diff --git a/src/query/sql/src/planner/binder/bind_query/bind_select.rs b/src/query/sql/src/planner/binder/bind_query/bind_select.rs index 3f35bbec56468..0314b71279d5a 100644 --- a/src/query/sql/src/planner/binder/bind_query/bind_select.rs +++ b/src/query/sql/src/planner/binder/bind_query/bind_select.rs @@ -73,6 +73,12 @@ impl Binder { } } + // whether allow rewrite virtual column and pushdown + let allow_pushdown = LicenseManagerSwitch::instance() + .check_enterprise_enabled(self.ctx.get_license_key(), Feature::VirtualColumn) + .is_ok(); + bind_context.virtual_column_context.allow_pushdown = allow_pushdown; + let (mut s_expr, mut from_context) = if stmt.from.is_empty() { let select_list = &stmt.select_list; self.bind_dummy_table(bind_context, select_list)? @@ -100,12 +106,6 @@ impl Binder { self.bind_table_reference(bind_context, &cross_joins)? }; - // whether allow rewrite virtual column and pushdown - let allow_pushdown = LicenseManagerSwitch::instance() - .check_enterprise_enabled(self.ctx.get_license_key(), Feature::VirtualColumn) - .is_ok(); - from_context.virtual_column_context.allow_pushdown = allow_pushdown; - let mut rewriter = SelectRewriter::new( from_context.all_column_bindings(), self.name_resolution_ctx.unquoted_ident_case_sensitive, diff --git a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs index 8dfaa916b411e..1e36d39c25758 100644 --- a/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs +++ b/src/query/sql/src/planner/binder/bind_table_reference/bind_table.rs @@ -12,6 +12,9 @@ // See the License for the specific language governing permissions and // limitations under the License. +use std::collections::HashMap; +use std::sync::Arc; + use databend_common_ast::ast::Identifier; use databend_common_ast::ast::SampleConfig; use databend_common_ast::ast::Statement; @@ -21,12 +24,17 @@ use databend_common_ast::ast::WithOptions; use databend_common_ast::parser::parse_sql; use databend_common_ast::parser::tokenize_sql; use databend_common_ast::Span; +use databend_common_catalog::table::Table; use databend_common_catalog::table::TimeNavigation; use databend_common_catalog::table_with_options::check_with_opt_valid; use databend_common_catalog::table_with_options::get_with_opt_consume; use databend_common_catalog::table_with_options::get_with_opt_max_batch_size; use databend_common_exception::ErrorCode; use databend_common_exception::Result; +use databend_common_expression::types::DataType; +use databend_common_meta_app::schema::ListVirtualColumnsReq; +use databend_common_meta_app::schema::VirtualField; +use databend_common_meta_types::MetaId; use databend_common_storages_view::view_table::QUERY; use databend_storages_common_table_meta::table::get_change_type; @@ -34,6 +42,9 @@ use crate::binder::util::TableIdentifier; use crate::binder::Binder; use crate::optimizer::SExpr; use crate::BindContext; +use crate::ColumnBindingBuilder; +use crate::IndexType; +use crate::Visibility; impl Binder { /// Bind a base table. @@ -271,9 +282,9 @@ impl Binder { } _ => { let table_index = self.metadata.write().add_table( - catalog, + catalog.clone(), database.clone(), - table_meta, + table_meta.clone(), table_name_alias, bind_context.view_info.is_some(), bind_context.planning_agg_index, @@ -291,6 +302,16 @@ impl Binder { if let Some(alias) = alias { bind_context.apply_table_alias(alias, &self.name_resolution_ctx)?; } + + self.bind_table_virtual_column( + &mut bind_context, + table_meta.clone(), + table_index, + catalog.as_str(), + database.as_str(), + table_name.as_str(), + )?; + Ok((s_expr, bind_context)) } } @@ -318,4 +339,96 @@ impl Binder { _ => Ok(()), } } + + fn bind_table_virtual_column( + &mut self, + bind_context: &mut BindContext, + table_meta: Arc, + table_index: IndexType, + catalog_name: &str, + database_name: &str, + table_name: &str, + ) -> Result<()> { + if !bind_context.virtual_column_context.allow_pushdown { + return Ok(()); + } + // Ignore tables that do not support virtual columns + if !table_meta.support_virtual_columns() { + return Ok(()); + } + + // If the table creates virtual columns, add the information to the context, + // the matched variant path expression will be converted to a virtual column + // and pushed down to the storage layer for reading to speed up the query. + let schema = table_meta.schema(); + if !bind_context + .virtual_column_context + .table_indices + .contains(&table_index) + { + let table_id = table_meta.get_id(); + let virtual_column_fields = databend_common_base::runtime::block_on( + self.get_virtual_columns(catalog_name, table_id), + )?; + bind_context + .virtual_column_context + .table_indices + .insert(table_index); + if let Some(virtual_column_fields) = virtual_column_fields { + let mut virtual_column_name_map = + HashMap::with_capacity(virtual_column_fields.len()); + for virtual_field in virtual_column_fields.into_iter() { + // Add optional virtual column alias names. + // Don't need to set the column index, as the virtual expr + // will be parsed as an expression and then bind to virtual column. + if let Some(alias_name) = virtual_field.alias_name { + let virtual_column_binding = ColumnBindingBuilder::new( + alias_name.clone(), + 0, + Box::new(DataType::from(&virtual_field.data_type)), + Visibility::InVisible, + ) + .database_name(Some(database_name.to_string())) + .table_name(Some(table_name.to_string())) + .table_index(Some(table_index)) + .virtual_expr(Some(virtual_field.expr.clone())) + .build(); + + bind_context + .virtual_column_context + .virtual_columns + .push(virtual_column_binding); + } + virtual_column_name_map.insert(virtual_field.expr, virtual_field.data_type); + } + bind_context + .virtual_column_context + .virtual_column_names + .insert(table_index, virtual_column_name_map); + bind_context + .virtual_column_context + .next_column_ids + .insert(table_index, schema.next_column_id); + } + } + + Ok(()) + } + + async fn get_virtual_columns( + &self, + catalog_name: &str, + table_id: MetaId, + ) -> Result>> { + let tenant = self.ctx.get_tenant(); + let catalog = self.ctx.get_catalog(catalog_name).await?; + let req = ListVirtualColumnsReq::new(tenant, Some(table_id)); + + if let Ok(virtual_column_metas) = catalog.list_virtual_columns(req).await { + if !virtual_column_metas.is_empty() { + return Ok(Some(virtual_column_metas[0].virtual_columns.clone())); + } + } + Ok(None) + } } diff --git a/src/query/sql/src/planner/binder/column_binding.rs b/src/query/sql/src/planner/binder/column_binding.rs index 21e4562ebeb86..bbecaefbcbf30 100644 --- a/src/query/sql/src/planner/binder/column_binding.rs +++ b/src/query/sql/src/planner/binder/column_binding.rs @@ -36,8 +36,9 @@ pub struct ColumnBinding { pub data_type: Box, pub visibility: Visibility, - - pub virtual_computed_expr: Option, + // Opitonal virtual expr, used by virtual computed column and variant virtual column, + // `virtual_expr` will be parsed and bind to a `ScalarExpr`. + pub virtual_expr: Option, } const DUMMY_INDEX: usize = usize::MAX; @@ -75,7 +76,7 @@ impl ColumnBinding { index, data_type, visibility: Visibility::Visible, - virtual_computed_expr: None, + virtual_expr: None, } } @@ -104,7 +105,7 @@ pub struct ColumnBindingBuilder { pub visibility: Visibility, - pub virtual_computed_expr: Option, + pub virtual_expr: Option, } impl ColumnBindingBuilder { @@ -123,7 +124,7 @@ impl ColumnBindingBuilder { index, data_type, visibility, - virtual_computed_expr: None, + virtual_expr: None, } } @@ -147,8 +148,8 @@ impl ColumnBindingBuilder { self } - pub fn virtual_computed_expr(mut self, vir: Option) -> ColumnBindingBuilder { - self.virtual_computed_expr = vir; + pub fn virtual_expr(mut self, virtual_expr: Option) -> ColumnBindingBuilder { + self.virtual_expr = virtual_expr; self } @@ -162,7 +163,7 @@ impl ColumnBindingBuilder { index: self.index, data_type: self.data_type, visibility: self.visibility, - virtual_computed_expr: self.virtual_computed_expr, + virtual_expr: self.virtual_expr, } } } diff --git a/src/query/sql/src/planner/binder/ddl/virtual_column.rs b/src/query/sql/src/planner/binder/ddl/virtual_column.rs index 1b21ac9f52b36..02386502a8d0d 100644 --- a/src/query/sql/src/planner/binder/ddl/virtual_column.rs +++ b/src/query/sql/src/planner/binder/ddl/virtual_column.rs @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -use std::collections::HashMap; +use std::collections::BTreeMap; +use std::collections::HashSet; use std::collections::VecDeque; use std::mem; @@ -25,6 +26,7 @@ use databend_common_ast::ast::MapAccessor; use databend_common_ast::ast::RefreshVirtualColumnStmt; use databend_common_ast::ast::ShowLimit; use databend_common_ast::ast::ShowVirtualColumnsStmt; +use databend_common_ast::ast::VirtualColumn; use databend_common_exception::ErrorCode; use databend_common_exception::Result; use databend_common_expression::type_check::get_simple_cast_function; @@ -32,6 +34,7 @@ use databend_common_expression::types::DataType; use databend_common_expression::TableDataType; use databend_common_expression::TableSchemaRef; use databend_common_meta_app::schema::ListVirtualColumnsReq; +use databend_common_meta_app::schema::VirtualField; use log::debug; use crate::binder::Binder; @@ -90,6 +93,7 @@ impl Binder { database, table, virtual_columns, + auto_generated: false, }, ))) } @@ -128,6 +132,7 @@ impl Binder { database, table, virtual_columns, + auto_generated: false, }))) } @@ -201,20 +206,21 @@ impl Binder { #[async_backtrace::framed] async fn analyze_virtual_columns( &mut self, - virtual_columns: &[Expr], + virtual_columns: &[VirtualColumn], schema: TableSchemaRef, - ) -> Result> { - let mut virtual_names = HashMap::with_capacity(virtual_columns.len()); + ) -> Result> { + let mut alias_name_set = HashSet::new(); + let mut virtual_field_map = BTreeMap::new(); for virtual_column in virtual_columns.iter() { let mut typ = None; - let mut expr = virtual_column; + let mut expr = *virtual_column.expr.clone(); match expr { Expr::Cast { expr: inner_expr, target_type, .. } => { - expr = inner_expr; + expr = *inner_expr.clone(); typ = Some(target_type); } Expr::TryCast { @@ -222,7 +228,7 @@ impl Binder { target_type, .. } => { - expr = inner_expr; + expr = *inner_expr.clone(); typ = Some(target_type); } _ => {} @@ -234,13 +240,13 @@ impl Binder { .. } = expr { - expr = &**inner_expr; + expr = *inner_expr; let path = match accessor { MapAccessor::Bracket { key: box Expr::Literal { value, .. }, } => value.clone(), MapAccessor::Colon { key } => Literal::String(key.name.clone()), - MapAccessor::DotNumber { key } => Literal::UInt64(*key), + MapAccessor::DotNumber { key } => Literal::UInt64(key), _ => { return Err(ErrorCode::SemanticError(format!( "Unsupported accessor: {:?}", @@ -255,7 +261,7 @@ impl Binder { "Virtual Column should be a inner field of Variant Column", )); } - if let Expr::ColumnRef { column, .. } = expr { + if let Expr::ColumnRef { ref column, .. } = expr { if let Ok(field) = schema.field_with_name(column.column.name()) { if field.data_type().remove_nullable() != TableDataType::Variant { return Err(ErrorCode::SemanticError( @@ -281,7 +287,7 @@ impl Binder { } let data_type = if let Some(typ) = typ { - let data_type = resolve_type_name(typ, false)?; + let data_type = resolve_type_name(&typ, false)?; let dest_type = DataType::from(&data_type.remove_nullable()); let cast_func_name = get_simple_cast_function(true, &DataType::Variant, &dest_type); @@ -296,13 +302,39 @@ impl Binder { TableDataType::Nullable(Box::new(TableDataType::Variant)) }; - if virtual_names.contains_key(&virtual_name) { + if virtual_field_map.contains_key(&virtual_name) { return Err(ErrorCode::SemanticError(format!( "Duplicate virtual column: {}", virtual_name ))); } - virtual_names.insert(virtual_name, data_type); + + let alias_name = virtual_column + .alias + .as_ref() + .map(|ident| self.normalize_identifier(ident).name.clone()); + + if let Some(alias_name) = &alias_name { + if schema.field_with_name(alias_name).is_ok() { + return Err(ErrorCode::SemanticError(format!( + "Virtual column alias name {} conflict with table field", + alias_name + ))); + } + if alias_name_set.contains(alias_name) { + return Err(ErrorCode::SemanticError(format!( + "Virtual column alias name {} duplicate", + alias_name + ))); + } + alias_name_set.insert(alias_name.clone()); + } + let virtual_field = VirtualField { + expr: virtual_name.clone(), + data_type, + alias_name: alias_name.clone(), + }; + virtual_field_map.insert(virtual_name, virtual_field); } else { return Err(ErrorCode::SemanticError(format!( "Column is not exist: {:?}", @@ -316,9 +348,8 @@ impl Binder { ))); } } - let mut virtual_columns: Vec<_> = virtual_names.into_iter().collect(); - virtual_columns.sort_by(|lv, rv| lv.0.cmp(&rv.0)); - Ok(virtual_columns) + let virtual_fields = virtual_field_map.into_values().collect::>(); + Ok(virtual_fields) } #[async_backtrace::framed] diff --git a/src/query/sql/src/planner/binder/project.rs b/src/query/sql/src/planner/binder/project.rs index 0210567367c80..f17c9fb56a8fc 100644 --- a/src/query/sql/src/planner/binder/project.rs +++ b/src/query/sql/src/planner/binder/project.rs @@ -311,8 +311,8 @@ impl Binder { select_target: &'a SelectTarget, column_binding: ColumnBinding, ) -> Result> { - let scalar = match column_binding.virtual_computed_expr { - Some(virtual_computed_expr) => { + let scalar = match column_binding.virtual_expr { + Some(virtual_expr) => { let mut input_context = input_context.clone(); let mut scalar_binder = ScalarBinder::new( &mut input_context, @@ -321,7 +321,7 @@ impl Binder { self.metadata.clone(), &[], ); - let sql_tokens = tokenize_sql(virtual_computed_expr.as_str())?; + let sql_tokens = tokenize_sql(virtual_expr.as_str())?; let expr = parse_expr(&sql_tokens, self.dialect)?; let (scalar, _) = scalar_binder.bind(&expr)?; diff --git a/src/query/sql/src/planner/binder/table.rs b/src/query/sql/src/planner/binder/table.rs index 2511596844baa..ccc6c7601d5f0 100644 --- a/src/query/sql/src/planner/binder/table.rs +++ b/src/query/sql/src/planner/binder/table.rs @@ -373,7 +373,7 @@ impl Binder { data_type, table_index, column_position, - virtual_computed_expr, + virtual_expr, .. }) => { let column_binding = ColumnBindingBuilder::new( @@ -390,7 +390,7 @@ impl Binder { .database_name(Some(database_name.to_string())) .table_index(Some(*table_index)) .column_position(*column_position) - .virtual_computed_expr(virtual_computed_expr.clone()) + .virtual_expr(virtual_expr.clone()) .build(); bind_context.add_column_binding(column_binding); base_column_scan_id.insert(*column_index, scan_id); diff --git a/src/query/sql/src/planner/expression_parser.rs b/src/query/sql/src/planner/expression_parser.rs index ad8c6e93eeef1..e5ba4a9ef159c 100644 --- a/src/query/sql/src/planner/expression_parser.rs +++ b/src/query/sql/src/planner/expression_parser.rs @@ -79,7 +79,7 @@ pub fn bind_table(table_meta: Arc) -> Result<(BindContext, MetadataRe column_name, data_type, path_indices, - virtual_computed_expr, + virtual_expr, .. }) => { let visibility = if path_indices.is_some() { @@ -96,7 +96,7 @@ pub fn bind_table(table_meta: Arc) -> Result<(BindContext, MetadataRe .database_name(Some("default".to_string())) .table_name(Some(table.name().to_string())) .table_index(Some(table.index())) - .virtual_computed_expr(virtual_computed_expr.clone()) + .virtual_expr(virtual_expr.clone()) .build() } _ => { diff --git a/src/query/sql/src/planner/metadata.rs b/src/query/sql/src/planner/metadata.rs index d3a84062b70cf..f775e2374eb27 100644 --- a/src/query/sql/src/planner/metadata.rs +++ b/src/query/sql/src/planner/metadata.rs @@ -240,7 +240,7 @@ impl Metadata { path_indices: Option>, column_id: Option, column_position: Option, - virtual_computed_expr: Option, + virtual_expr: Option, ) -> IndexType { let column_index = self.columns.len(); let column_entry = ColumnEntry::BaseTableColumn(BaseTableColumn { @@ -251,7 +251,7 @@ impl Metadata { table_index, path_indices, column_id, - virtual_computed_expr, + virtual_expr, }); self.columns.push(column_entry); column_index @@ -613,7 +613,7 @@ pub struct BaseTableColumn { /// The column id in table schema. pub column_id: Option, /// Virtual computed expression, generated in query. - pub virtual_computed_expr: Option, + pub virtual_expr: Option, } #[derive(Clone, Debug)] diff --git a/src/query/sql/src/planner/optimizer/aggregate/normalize_aggregate.rs b/src/query/sql/src/planner/optimizer/aggregate/normalize_aggregate.rs index bad2a93f5dd66..a9bf61c1a0d5b 100644 --- a/src/query/sql/src/planner/optimizer/aggregate/normalize_aggregate.rs +++ b/src/query/sql/src/planner/optimizer/aggregate/normalize_aggregate.rs @@ -145,7 +145,7 @@ impl RuleNormalizeAggregateOptimizer { database_name: None, column_position: None, index: work_index, - virtual_computed_expr: None, + virtual_expr: None, data_type: work_c.return_type.clone(), visibility: Visibility::Visible, column_name: work_c.display_name.clone(), diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs index 0e159989974ac..5b38b085e23c2 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_scan.rs @@ -104,7 +104,7 @@ impl RulePushDownFilterScan { if self.replace_view { column_binding_builder = column_binding_builder - .virtual_computed_expr(column.column.virtual_computed_expr.clone()); + .virtual_expr(column.column.virtual_expr.clone()); } column.column = column_binding_builder.build(); diff --git a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs index 2701b85446fa9..bafd4aa547364 100644 --- a/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs +++ b/src/query/sql/src/planner/optimizer/rule/rewrite/rule_push_down_filter_union.rs @@ -142,7 +142,7 @@ fn replace_column_binding( column.column.data_type.clone(), Visibility::Visible, ) - .virtual_computed_expr(column.column.virtual_computed_expr.clone()) + .virtual_expr(column.column.virtual_expr.clone()) .build(); column.column = new_column; } diff --git a/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs b/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs index f48b100e6ebe2..b93fc91a3ad69 100644 --- a/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs +++ b/src/query/sql/src/planner/optimizer/statistics/collect_statistics.rs @@ -75,11 +75,11 @@ impl CollectStatisticsOptimizer { if let ColumnEntry::BaseTableColumn(BaseTableColumn { column_index, column_id, - virtual_computed_expr, + virtual_expr, .. }) = column { - if virtual_computed_expr.is_none() { + if virtual_expr.is_none() { if let Some(column_id) = *column_id { let col_stat = column_statistics_provider .column_statistics(column_id as ColumnId); diff --git a/src/query/sql/src/planner/plans/ddl/virtual_column.rs b/src/query/sql/src/planner/plans/ddl/virtual_column.rs index 7ac990ed780e9..e5e7cbb6379fb 100644 --- a/src/query/sql/src/planner/plans/ddl/virtual_column.rs +++ b/src/query/sql/src/planner/plans/ddl/virtual_column.rs @@ -16,8 +16,8 @@ use std::sync::Arc; use databend_common_expression::DataSchema; use databend_common_expression::DataSchemaRef; -use databend_common_expression::TableDataType; use databend_common_meta_app::schema::CreateOption; +use databend_common_meta_app::schema::VirtualField; use databend_storages_common_table_meta::meta::Location; #[derive(Clone, Debug, PartialEq, Eq)] @@ -26,7 +26,8 @@ pub struct CreateVirtualColumnPlan { pub catalog: String, pub database: String, pub table: String, - pub virtual_columns: Vec<(String, TableDataType)>, + pub virtual_columns: Vec, + pub auto_generated: bool, } impl CreateVirtualColumnPlan { @@ -41,7 +42,8 @@ pub struct AlterVirtualColumnPlan { pub catalog: String, pub database: String, pub table: String, - pub virtual_columns: Vec<(String, TableDataType)>, + pub virtual_columns: Vec, + pub auto_generated: bool, } impl AlterVirtualColumnPlan { @@ -69,7 +71,7 @@ pub struct RefreshVirtualColumnPlan { pub catalog: String, pub database: String, pub table: String, - pub virtual_columns: Vec<(String, TableDataType)>, + pub virtual_columns: Vec, pub segment_locs: Option>, } diff --git a/src/query/sql/src/planner/semantic/type_check.rs b/src/query/sql/src/planner/semantic/type_check.rs index 991c5e10c870a..e128f3a89be9c 100644 --- a/src/query/sql/src/planner/semantic/type_check.rs +++ b/src/query/sql/src/planner/semantic/type_check.rs @@ -52,7 +52,6 @@ use databend_common_catalog::plan::InternalColumn; use databend_common_catalog::plan::InternalColumnType; use databend_common_catalog::plan::InvertedIndexInfo; use databend_common_catalog::plan::InvertedIndexOption; -use databend_common_catalog::table::Table; use databend_common_catalog::table_context::TableContext; use databend_common_compress::CompressAlgorithm; use databend_common_compress::DecompressDecoder; @@ -100,7 +99,6 @@ use databend_common_meta_app::principal::UDFServer; use databend_common_meta_app::schema::dictionary_name_ident::DictionaryNameIdent; use databend_common_meta_app::schema::DictionaryIdentity; use databend_common_meta_app::schema::GetSequenceReq; -use databend_common_meta_app::schema::ListVirtualColumnsReq; use databend_common_meta_app::schema::SequenceIdent; use databend_common_storage::init_stage_operator; use databend_common_users::UserApiProvider; @@ -169,7 +167,6 @@ use crate::ColumnBinding; use crate::ColumnBindingBuilder; use crate::ColumnEntry; use crate::MetadataRef; -use crate::TableEntry; use crate::Visibility; /// A helper for type checking. @@ -274,8 +271,8 @@ impl<'a> TypeChecker<'a> { let (scalar, data_type) = match result { NameResolutionResult::Column(column) => { - if let Some(virtual_computed_expr) = column.virtual_computed_expr { - let sql_tokens = tokenize_sql(virtual_computed_expr.as_str())?; + if let Some(virtual_expr) = column.virtual_expr { + let sql_tokens = tokenize_sql(virtual_expr.as_str())?; let expr = parse_expr(&sql_tokens, self.dialect)?; return self.resolve(&expr); } else { @@ -297,21 +294,15 @@ impl<'a> TypeChecker<'a> { self.metadata.clone(), true, )?; - if let Some(virtual_computed_expr) = column.virtual_computed_expr { - let sql_tokens = tokenize_sql(virtual_computed_expr.as_str())?; - let expr = parse_expr(&sql_tokens, self.dialect)?; - return self.resolve(&expr); - } else { - let data_type = *column.data_type.clone(); - ( - BoundColumnRef { - span: *span, - column, - } - .into(), - data_type, - ) - } + let data_type = *column.data_type.clone(); + ( + BoundColumnRef { + span: *span, + column, + } + .into(), + data_type, + ) } NameResolutionResult::Alias { scalar, .. } => { (scalar.clone(), scalar.data_type()?) @@ -4668,28 +4659,6 @@ impl<'a> TypeChecker<'a> { Ok(Box::new((subquery_expr.into(), data_type))) } - async fn get_virtual_columns( - &self, - table_entry: &TableEntry, - table: Arc, - ) -> Result>> { - let table_id = table.get_id(); - let req = ListVirtualColumnsReq::new(self.ctx.get_tenant(), Some(table_id)); - let catalog = self.ctx.get_catalog(table_entry.catalog()).await?; - - if let Ok(virtual_column_metas) = catalog.list_virtual_columns(req).await { - if !virtual_column_metas.is_empty() { - let mut virtual_column_name_map = - HashMap::with_capacity(virtual_column_metas[0].virtual_columns.len()); - for (name, typ) in virtual_column_metas[0].virtual_columns.iter() { - virtual_column_name_map.insert(name.clone(), typ.clone()); - } - return Ok(Some(virtual_column_name_map)); - } - } - Ok(None) - } - fn try_rewrite_virtual_column( &mut self, base_column: &BaseTableColumn, @@ -4698,42 +4667,6 @@ impl<'a> TypeChecker<'a> { if !self.bind_context.virtual_column_context.allow_pushdown { return Ok(None); } - - let metadata = self.metadata.read().clone(); - let table_entry = metadata.table(base_column.table_index); - - let table = table_entry.table(); - // Ignore tables that do not support virtual columns - if !table.support_virtual_columns() { - return Ok(None); - } - let schema = table.schema(); - - if !self - .bind_context - .virtual_column_context - .table_indices - .contains(&base_column.table_index) - { - let virtual_column_name_map = databend_common_base::runtime::block_on( - self.get_virtual_columns(table_entry, table), - )?; - self.bind_context - .virtual_column_context - .table_indices - .insert(base_column.table_index); - if let Some(virtual_column_name_map) = virtual_column_name_map { - self.bind_context - .virtual_column_context - .virtual_column_names - .insert(base_column.table_index, virtual_column_name_map); - self.bind_context - .virtual_column_context - .next_column_ids - .insert(base_column.table_index, schema.next_column_id); - } - } - if let Some(virtual_column_name_map) = self .bind_context .virtual_column_context @@ -4762,7 +4695,11 @@ impl<'a> TypeChecker<'a> { let mut index = 0; // Check for duplicate virtual columns - for table_column in metadata.virtual_columns_by_table_index(base_column.table_index) { + for table_column in self + .metadata + .read() + .virtual_columns_by_table_index(base_column.table_index) + { if table_column.name() == name { index = table_column.index(); break; diff --git a/src/query/storages/system/src/virtual_columns_table.rs b/src/query/storages/system/src/virtual_columns_table.rs index a827e96115882..8a69c2018e7a0 100644 --- a/src/query/storages/system/src/virtual_columns_table.rs +++ b/src/query/storages/system/src/virtual_columns_table.rs @@ -84,11 +84,22 @@ impl AsyncSystemTable for VirtualColumnsTable { virtual_column_meta .virtual_columns .iter() - .map(|(name, ty)| { - if ty.remove_nullable() == TableDataType::Variant { - name.to_string() + .map(|virtual_field| { + let virtual_expr = if virtual_field.data_type.remove_nullable() + == TableDataType::Variant + { + virtual_field.expr.to_string() } else { - format!("{}::{}", name, ty.remove_nullable()) + format!( + "{}::{}", + virtual_field.expr, + virtual_field.data_type.remove_nullable() + ) + }; + if let Some(alias_name) = &virtual_field.alias_name { + format!("{} AS {}", virtual_expr, alias_name) + } else { + virtual_expr } }) .join(", "), diff --git a/tests/sqllogictests/suites/ee/05_ee_ddl/05_0002_ddl_create_drop_virtual_columns.test b/tests/sqllogictests/suites/ee/05_ee_ddl/05_0002_ddl_create_drop_virtual_columns.test index 77dcc2e5737cb..a0e6a8449fdf1 100644 --- a/tests/sqllogictests/suites/ee/05_ee_ddl/05_0002_ddl_create_drop_virtual_columns.test +++ b/tests/sqllogictests/suites/ee/05_ee_ddl/05_0002_ddl_create_drop_virtual_columns.test @@ -45,17 +45,28 @@ CREATE VIRTUAL COLUMN (v) FOR t1; statement error 1065 CREATE VIRTUAL COLUMN (a['k1']) FOR t1; +statement error 1065 +CREATE VIRTUAL COLUMN (v['k1'] as a) FOR t1; + +statement error 1065 +CREATE VIRTUAL COLUMN (v['k1'] as v1, v['k2'] as v1) FOR t1; + statement ok -CREATE VIRTUAL COLUMN (v['k1'], v:k2, v[0]) FOR t1; +CREATE VIRTUAL COLUMN (v['k1'] as v1, v:k2 as v2, v[0] as v3) FOR t1; statement ok -CREATE VIRTUAL COLUMN IF NOT EXISTS (v['k1'], v:k2, v[0]) FOR t1; +CREATE VIRTUAL COLUMN IF NOT EXISTS (v['k1'] as v1, v:k2 as v2, v[0] as v3) FOR t1; statement error 1116 CREATE VIRTUAL COLUMN (v['k1'], v:k2, v[0]) FOR t1; statement ok -ALTER VIRTUAL COLUMN (v['k1']['k2'], v[1][2], v[3]) FOR t1; +ALTER VIRTUAL COLUMN (v['k1']['k2'] as v1, v[1][2] as v2, v[3] as v3) FOR t1; + +query TTT +SHOW VIRTUAL COLUMNS WHERE database='test_virtual_db' AND table='t1'; +---- +test_virtual_db t1 v['k1']['k2'] AS v1, v[1][2] AS v2, v[3] AS v3 statement ok REFRESH VIRTUAL COLUMN FOR t1; diff --git a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test index 4c1d6683bfc22..00e868cb1fb82 100644 --- a/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test +++ b/tests/sqllogictests/suites/mode/standalone/ee/explain_virtual_column.test @@ -67,7 +67,7 @@ EvalScalar └── estimated rows: 0.20 statement ok -create virtual column (v['a'][0], v['b']) for t1 +create virtual column (v['a'][0] as v1, v['b'] as v2) for t1 statement ok refresh virtual column for t1 @@ -87,6 +87,21 @@ TableScan ├── virtual columns: [v['a'][0], v['b']] └── estimated rows: 1.00 +query T +explain select a, v1, v2 from t1 +---- +TableScan +├── table: default.test_virtual_db.t1 +├── output columns: [a (#0), v['a'][0] (#2), v['b'] (#3)] +├── read rows: 1 +├── read size: < 1 KiB +├── partitions total: 1 +├── partitions scanned: 1 +├── pruning stats: [segments: , blocks: ] +├── push downs: [filters: [], limit: NONE] +├── virtual columns: [v['a'][0], v['b']] +└── estimated rows: 1.00 + query T explain select a, v['a'][0], v['b'][1] from t1 ---- @@ -130,7 +145,6 @@ create table t2 (a int null, v json null) storage_format = 'parquet' statement ok insert into t2 values(1, parse_json('{"a":[1,2,3],"b":{"c":10}}')) - query T explain select a, v['a'][0], v['b'] from t2 ---- @@ -172,7 +186,7 @@ EvalScalar └── estimated rows: 1.00 statement ok -create virtual column (v['a'][0], v['b']) for t2 +create virtual column (v['a'][0] as v1, v['b'] as v2) for t2 statement ok refresh virtual column for t2 @@ -192,6 +206,21 @@ TableScan ├── virtual columns: [v['a'][0], v['b']] └── estimated rows: 1.00 +query T +explain select a, v1, v2 from t2 +---- +TableScan +├── table: default.test_virtual_db.t2 +├── output columns: [a (#0), v['a'][0] (#2), v['b'] (#3)] +├── read rows: 1 +├── read size: < 1 KiB +├── partitions total: 1 +├── partitions scanned: 1 +├── pruning stats: [segments: , blocks: ] +├── push downs: [filters: [], limit: NONE] +├── virtual columns: [v['a'][0], v['b']] +└── estimated rows: 1.00 + query T explain select a, v['a'][0], v['b'][1] from t2 ----