Skip to content

Commit

Permalink
feat(query): Virtual column support alias name (#17365)
Browse files Browse the repository at this point in the history
* feat(query): Virtual column support alias name

* fix

* fix machete

* add tests

* fix tests

* fix comments

* fix
  • Loading branch information
b41sh authored Jan 27, 2025
1 parent 3bb9e91 commit c5e7355
Show file tree
Hide file tree
Showing 43 changed files with 1,082 additions and 610 deletions.
1 change: 0 additions & 1 deletion Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions src/meta/api/src/schema_api_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,7 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
virtual_columns: req.virtual_columns.clone(),
created_on: Utc::now(),
updated_on: None,
auto_generated: req.auto_generated,
};

self.insert_name_value_with_create_option(
Expand Down Expand Up @@ -963,6 +964,7 @@ impl<KV: kvapi::KVApi<Error = MetaError> + ?Sized> SchemaApi for KV {
|mut meta| {
meta.virtual_columns = req.virtual_columns.clone();
meta.updated_on = Some(Utc::now());
meta.auto_generated = req.auto_generated;
Some((meta, None))
},
not_found,
Expand Down
331 changes: 187 additions & 144 deletions src/meta/api/src/schema_api_test_suite.rs

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions src/meta/app/src/schema/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -144,4 +144,5 @@ pub use virtual_column::DropVirtualColumnReq;
pub use virtual_column::ListVirtualColumnsReq;
pub use virtual_column::UpdateVirtualColumnReq;
pub use virtual_column::VirtualColumnMeta;
pub use virtual_column::VirtualField;
pub use virtual_column_ident::VirtualColumnIdent;
51 changes: 47 additions & 4 deletions src/meta/app/src/schema/virtual_column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,62 @@ use crate::schema::virtual_column_ident::VirtualColumnIdent;
use crate::tenant::Tenant;
use crate::tenant::ToTenant;

#[derive(serde::Serialize, serde::Deserialize, Clone, Debug, Eq, PartialEq)]
// The virtual field column definition of Variant type.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct VirtualField {
// Expression to extracts the internal virtual field of the variant value.
// for example:
// `data['key']`, `data[0]`, `data['key1']['key2']`, ..
pub expr: String,
// The data type of internal virtual field.
// If all the rows of a virtual field has same type,
// the virtual field can cast to the type.
pub data_type: TableDataType,
// Optional alias name.
pub alias_name: Option<String>,
}

impl Display for VirtualField {
fn fmt(&self, f: &mut Formatter) -> fmt::Result {
if let Some(alias_name) = &self.alias_name {
write!(
f,
"{}::{} AS {}",
self.expr,
self.data_type.remove_nullable(),
alias_name
)
} else {
write!(f, "{}::{}", self.expr, self.data_type.remove_nullable())
}
}
}

#[derive(Clone, Debug, Eq, PartialEq)]
pub struct VirtualColumnMeta {
pub table_id: MetaId,

pub virtual_columns: Vec<(String, TableDataType)>,
// The internal virtual field columns of Variant type.
// For example, the data column has the following values:
// `{"id":1,"name":"tom","metas":{"key1":"val1","key2":"val2"}}`
// `{"id":2,"name":"alice","metas":{"key1":"val3","key2":"val4"}}`
// ...
// We can generate virtual columns as follows:
// `data['id']`, `data['name']`, `data['metas']['key1']`, `data['metas']['key2']`
pub virtual_columns: Vec<VirtualField>,
pub created_on: DateTime<Utc>,
pub updated_on: Option<DateTime<Utc>>,
// Whether the virtual columns are auto-generated,
// true for auto-generated, false for user-defined.
pub auto_generated: bool,
}

#[derive(Clone, Debug, PartialEq, Eq)]
pub struct CreateVirtualColumnReq {
pub create_option: CreateOption,
pub name_ident: VirtualColumnIdent,
pub virtual_columns: Vec<(String, TableDataType)>,
pub virtual_columns: Vec<VirtualField>,
pub auto_generated: bool,
}

impl Display for CreateVirtualColumnReq {
Expand All @@ -57,7 +99,8 @@ impl Display for CreateVirtualColumnReq {
pub struct UpdateVirtualColumnReq {
pub if_exists: bool,
pub name_ident: VirtualColumnIdent,
pub virtual_columns: Vec<(String, TableDataType)>,
pub virtual_columns: Vec<VirtualField>,
pub auto_generated: bool,
}

impl Display for UpdateVirtualColumnReq {
Expand Down
3 changes: 2 additions & 1 deletion src/meta/proto-conv/src/util.rs
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,8 @@ const META_CHANGE_LOG: &[(u64, &str)] = &[
(115, "2024-12-16: Add: udf.proto: add UDAFScript and UDAFServer"),
(116, "2025-01-09: Add: MarkedDeletedIndexMeta"),
(117, "2025-01-21: Add: config.proto: add disable_list_batch in WebhdfsConfig"),
(118, "2025-01-22: Add: config.proto: add user_name in WebhdfsConfig")
(118, "2025-01-22: Add: config.proto: add user_name in WebhdfsConfig"),
(119, "2025-01-25: Add: virtual_column add alias_names and auto_generated field"),
// Dear developer:
// If you're gonna add a new metadata version, you'll have to add a test for it.
// You could just copy an existing test file(e.g., `../tests/it/v024_table_meta.rs`)
Expand Down
65 changes: 36 additions & 29 deletions src/meta/proto-conv/src/virtual_column_from_to_protobuf_impl.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
//! This mod is the key point about compatibility.
//! Everytime update anything in this file, update the `VER` and let the tests pass.
use std::collections::BTreeMap;

use chrono::DateTime;
use chrono::Utc;
use databend_common_expression::TableDataType;
Expand All @@ -37,30 +39,28 @@ impl FromToProto for mt::VirtualColumnMeta {
fn from_pb(p: Self::PB) -> Result<Self, Incompatible>
where Self: Sized {
reader_check_msg(p.ver, p.min_reader_ver)?;
let virtual_columns = if p.data_types.is_empty() {
p.virtual_columns
.iter()
.map(|v| {
(
v.clone(),
TableDataType::Nullable(Box::new(TableDataType::Variant)),
)
})
.collect()
} else {
if p.virtual_columns.len() != p.data_types.len() {
return Err(Incompatible::new(format!(
"Incompatible virtual columns length is {}, but data types length is {}",
p.virtual_columns.len(),
p.data_types.len()
)));
}
let mut virtual_columns = Vec::new();
for (v, ty) in p.virtual_columns.iter().zip(p.data_types.iter()) {
virtual_columns.push((v.clone(), TableDataType::from_pb(ty.clone())?));
}
virtual_columns
};
if !p.data_types.is_empty() && p.virtual_columns.len() != p.data_types.len() {
return Err(Incompatible::new(format!(
"Incompatible virtual columns length is {}, but data types length is {}",
p.virtual_columns.len(),
p.data_types.len()
)));
}
let mut virtual_columns = Vec::with_capacity(p.virtual_columns.len());
for (i, expr) in p.virtual_columns.iter().enumerate() {
let data_type = if let Some(ty) = p.data_types.get(i) {
TableDataType::from_pb(ty.clone())?
} else {
TableDataType::Nullable(Box::new(TableDataType::Variant))
};
let alias_name = p.alias_names.get(&(i as u64)).cloned();
let virtual_column = mt::VirtualField {
expr: expr.clone(),
data_type,
alias_name,
};
virtual_columns.push(virtual_column);
}

let v = Self {
table_id: p.table_id,
Expand All @@ -70,16 +70,21 @@ impl FromToProto for mt::VirtualColumnMeta {
Some(updated_on) => Some(DateTime::<Utc>::from_pb(updated_on)?),
None => None,
},
auto_generated: p.auto_generated,
};
Ok(v)
}

fn to_pb(&self) -> Result<Self::PB, Incompatible> {
let mut data_types = Vec::new();
let mut virtual_columns = Vec::new();
for (v, ty) in self.virtual_columns.iter() {
data_types.push(ty.to_pb()?);
virtual_columns.push(v.clone());
let mut data_types = Vec::with_capacity(self.virtual_columns.len());
let mut virtual_columns = Vec::with_capacity(self.virtual_columns.len());
let mut alias_names = BTreeMap::new();
for (i, virtual_field) in self.virtual_columns.iter().enumerate() {
data_types.push(virtual_field.data_type.to_pb()?);
virtual_columns.push(virtual_field.expr.clone());
if let Some(alias_name) = &virtual_field.alias_name {
alias_names.insert(i as u64, alias_name.clone());
}
}
let p = pb::VirtualColumnMeta {
ver: VER,
Expand All @@ -92,6 +97,8 @@ impl FromToProto for mt::VirtualColumnMeta {
None => None,
},
data_types,
alias_names,
auto_generated: self.auto_generated,
};
Ok(p)
}
Expand Down
1 change: 1 addition & 0 deletions src/meta/proto-conv/tests/it/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -116,3 +116,4 @@ mod v115_add_udaf_script;
mod v116_marked_deleted_index_meta;
mod v117_webhdfs_add_disable_list_batch;
mod v118_webhdfs_add_user_name;
mod v119_virtual_column;
21 changes: 13 additions & 8 deletions src/meta/proto-conv/tests/it/v041_virtual_column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use chrono::TimeZone;
use chrono::Utc;
use databend_common_expression::TableDataType;
use databend_common_meta_app::schema::VirtualColumnMeta;
use databend_common_meta_app::schema::VirtualField;
use fastrace::func_name;

use crate::common;
Expand All @@ -42,23 +43,27 @@ fn test_decode_v41_virtual_column() -> anyhow::Result<()> {
let want = || {
let table_id = 7;
let virtual_columns = vec![
(
"v:k1:k2".to_string(),
TableDataType::Nullable(Box::new(TableDataType::Variant)),
),
(
"v[1][2]".to_string(),
TableDataType::Nullable(Box::new(TableDataType::Variant)),
),
VirtualField {
expr: "v:k1:k2".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
alias_name: None,
},
VirtualField {
expr: "v[1][2]".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
alias_name: None,
},
];
let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap();
let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap());
let auto_generated = false;

VirtualColumnMeta {
table_id,
virtual_columns,
created_on,
updated_on,
auto_generated,
}
};

Expand Down
30 changes: 18 additions & 12 deletions src/meta/proto-conv/tests/it/v112_virtual_column.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use chrono::TimeZone;
use chrono::Utc;
use databend_common_expression::TableDataType;
use databend_common_meta_app::schema::VirtualColumnMeta;
use databend_common_meta_app::schema::VirtualField;
use fastrace::func_name;

use crate::common;
Expand Down Expand Up @@ -45,27 +46,32 @@ fn test_decode_v112_virtual_column() -> anyhow::Result<()> {
let want = || {
let table_id = 7;
let virtual_columns = vec![
(
"v:k1:k2".to_string(),
TableDataType::Nullable(Box::new(TableDataType::Variant)),
),
(
"v[1][2]".to_string(),
TableDataType::Nullable(Box::new(TableDataType::Variant)),
),
(
"v:k3:k4".to_string(),
TableDataType::Nullable(Box::new(TableDataType::String)),
),
VirtualField {
expr: "v:k1:k2".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
alias_name: None,
},
VirtualField {
expr: "v[1][2]".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
alias_name: None,
},
VirtualField {
expr: "v:k3:k4".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::String)),
alias_name: None,
},
];
let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap();
let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap());
let auto_generated = false;

VirtualColumnMeta {
table_id,
virtual_columns,
created_on,
updated_on,
auto_generated,
}
};

Expand Down
83 changes: 83 additions & 0 deletions src/meta/proto-conv/tests/it/v119_virtual_column.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
// Copyright 2021 Datafuse Labs
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

use chrono::TimeZone;
use chrono::Utc;
use databend_common_expression::TableDataType;
use databend_common_meta_app::schema::VirtualColumnMeta;
use databend_common_meta_app::schema::VirtualField;
use fastrace::func_name;

use crate::common;

// These bytes are built when a new version in introduced,
// and are kept for backward compatibility test.
//
// *************************************************************
// * These messages should never be updated, *
// * only be added when a new version is added, *
// * or be removed when an old version is no longer supported. *
// *************************************************************
//
// The message bytes are built from the output of `proto_conv::test_build_pb_buf()`
#[test]
fn test_decode_v119_virtual_column() -> anyhow::Result<()> {
let schema_v119 = vec![
8, 7, 18, 7, 118, 58, 107, 49, 58, 107, 50, 18, 7, 118, 91, 49, 93, 91, 50, 93, 18, 7, 118,
58, 107, 51, 58, 107, 52, 26, 23, 50, 48, 50, 51, 45, 48, 51, 45, 48, 57, 32, 49, 48, 58,
48, 48, 58, 48, 48, 32, 85, 84, 67, 34, 23, 50, 48, 50, 51, 45, 48, 53, 45, 50, 57, 32, 49,
48, 58, 48, 48, 58, 48, 48, 32, 85, 84, 67, 42, 18, 178, 2, 9, 210, 2, 0, 160, 6, 119, 168,
6, 24, 160, 6, 119, 168, 6, 24, 42, 18, 178, 2, 9, 210, 2, 0, 160, 6, 119, 168, 6, 24, 160,
6, 119, 168, 6, 24, 42, 18, 178, 2, 9, 146, 2, 0, 160, 6, 119, 168, 6, 24, 160, 6, 119,
168, 6, 24, 50, 10, 8, 1, 18, 6, 118, 97, 108, 117, 101, 49, 50, 10, 8, 2, 18, 6, 118, 97,
108, 117, 101, 50, 56, 1, 160, 6, 119, 168, 6, 24,
];

let want = || {
let table_id = 7;
let virtual_columns = vec![
VirtualField {
expr: "v:k1:k2".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
alias_name: None,
},
VirtualField {
expr: "v[1][2]".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::Variant)),
alias_name: Some("value1".to_string()),
},
VirtualField {
expr: "v:k3:k4".to_string(),
data_type: TableDataType::Nullable(Box::new(TableDataType::String)),
alias_name: Some("value2".to_string()),
},
];
let created_on = Utc.with_ymd_and_hms(2023, 3, 9, 10, 0, 0).unwrap();
let updated_on = Some(Utc.with_ymd_and_hms(2023, 5, 29, 10, 0, 0).unwrap());
let auto_generated = true;

VirtualColumnMeta {
table_id,
virtual_columns,
created_on,
updated_on,
auto_generated,
}
};

common::test_pb_from_to(func_name!(), want())?;
common::test_load_old(func_name!(), schema_v119.as_slice(), 119, want())?;

Ok(())
}
Loading

0 comments on commit c5e7355

Please sign in to comment.