From c4e5a114169666951190c2084271bc5131c5fbd4 Mon Sep 17 00:00:00 2001 From: NiwakaDev Date: Sun, 24 Sep 2023 17:19:16 +0900 Subject: [PATCH 1/6] feat: support greatest function --- src/common/function/src/scalars/timestamp.rs | 3 + .../src/scalars/timestamp/greatest.rs | 148 ++++++++++++++++++ .../standalone/common/select/dummy.result | 8 + .../cases/standalone/common/select/dummy.sql | 3 +- 4 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 src/common/function/src/scalars/timestamp/greatest.rs diff --git a/src/common/function/src/scalars/timestamp.rs b/src/common/function/src/scalars/timestamp.rs index eb0e1afb1cb1..102bfb934bfc 100644 --- a/src/common/function/src/scalars/timestamp.rs +++ b/src/common/function/src/scalars/timestamp.rs @@ -12,8 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. use std::sync::Arc; +mod greatest; mod to_unixtime; +use greatest::GreatestFunction; use to_unixtime::ToUnixtimeFunction; use crate::scalars::function_registry::FunctionRegistry; @@ -23,5 +25,6 @@ pub(crate) struct TimestampFunction; impl TimestampFunction { pub fn register(registry: &FunctionRegistry) { registry.register(Arc::new(ToUnixtimeFunction)); + registry.register(Arc::new(GreatestFunction)); } } diff --git a/src/common/function/src/scalars/timestamp/greatest.rs b/src/common/function/src/scalars/timestamp/greatest.rs new file mode 100644 index 000000000000..0233b480a5b2 --- /dev/null +++ b/src/common/function/src/scalars/timestamp/greatest.rs @@ -0,0 +1,148 @@ +// Copyright 2023 Greptime Team +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +use std::fmt::{self}; +use std::str::FromStr; + +use common_query::error::{ + self, ArrowComputeSnafu, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu, +}; +use common_query::prelude::{Signature, Volatility}; +use common_time::Date; +use datatypes::arrow::array::Date32Array; +use datatypes::arrow::compute::kernels::comparison::gt_dyn; +use datatypes::arrow::compute::kernels::zip; +use datatypes::prelude::ConcreteDataType; +use datatypes::vectors::{Helper, StringVector, Vector, VectorRef}; +use snafu::{ensure, ResultExt}; + +use crate::scalars::function::{Function, FunctionContext}; + +#[derive(Clone, Debug, Default)] +pub struct GreatestFunction; + +const NAME: &str = "greatest"; + +pub fn convert_to_date(arg: &str) -> Option { + match Date::from_str(arg) { + Ok(ts) => Some(ts.val()), + Err(_err) => None, + } +} + +fn to_primitive_array(column: &VectorRef) -> Date32Array { + let column = StringVector::try_from_arrow_array(column.to_arrow_array()).unwrap(); + let column = (0..column.len()) + .map(|idx| convert_to_date(&column.get(idx).to_string())) + .collect::>(); + Date32Array::from_iter(column) +} + +impl Function for GreatestFunction { + fn name(&self) -> &str { + NAME + } + + fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result { + Ok(ConcreteDataType::date_datatype()) + } + + fn signature(&self) -> Signature { + Signature::uniform( + 2, + vec![ConcreteDataType::string_datatype()], + Volatility::Immutable, + ) + } + + fn eval(&self, _func_ctx: FunctionContext, columns: &[VectorRef]) -> Result { + ensure!( + columns.len() == 2, + InvalidFuncArgsSnafu { + err_msg: format!( + "The length of the args is not correct, expect exactly one, have: {}", + columns.len() + ), + } + ); + match columns[0].data_type() { + ConcreteDataType::String(_) => { + let column1 = to_primitive_array(&columns[0]); + let column2 = to_primitive_array(&columns[1]); + let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; + let result = + zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; + Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?) + } + _ => UnsupportedInputDataTypeSnafu { + function: NAME, + datatypes: columns.iter().map(|c| c.data_type()).collect::>(), + } + .fail(), + } + } +} + +impl fmt::Display for GreatestFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "GREATEST") + } +} + +#[cfg(test)] +mod tests { + use std::str::FromStr; + use std::sync::Arc; + + use common_time::Date; + use datatypes::prelude::ConcreteDataType; + use datatypes::types::DateType; + use datatypes::value::Value; + use datatypes::vectors::{DateVector, StringVector, Vector}; + + use super::GreatestFunction; + use crate::scalars::function::FunctionContext; + use crate::scalars::Function; + + #[test] + fn test_greatest() { + let function = GreatestFunction; + assert_eq!( + function.return_type(&[]).unwrap(), + ConcreteDataType::Date(DateType) + ); + let columns = vec![ + Arc::new(StringVector::from(vec![ + "1970-01-01".to_string(), + "2012-12-23".to_string(), + ])) as _, + Arc::new(StringVector::from(vec![ + "2001-02-01".to_string(), + "1999-01-01".to_string(), + ])) as _, + ]; + + let result = function.eval(FunctionContext::default(), &columns).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.len(), 2); + assert_eq!( + result.get(0), + Value::Date(Date::from_str("2001-02-01").unwrap()) + ); + assert_eq!( + result.get(1), + Value::Date(Date::from_str("2012-12-23").unwrap()) + ); + } +} diff --git a/tests/cases/standalone/common/select/dummy.result b/tests/cases/standalone/common/select/dummy.result index 58845ddf5860..d967a6510945 100644 --- a/tests/cases/standalone/common/select/dummy.result +++ b/tests/cases/standalone/common/select/dummy.result @@ -34,6 +34,14 @@ select * where "a" = "A"; Error: 3000(PlanQuery), No field named a. +select GREATEST('1999-01-30', '2023-03-01'); + ++-------------------------------------------------+ +| greatest(Utf8("1999-01-30"),Utf8("2023-03-01")) | ++-------------------------------------------------+ +| 2023-03-01 | ++-------------------------------------------------+ + select TO_UNIXTIME('2023-03-01T06:35:02Z'); +-------------------------------------------+ diff --git a/tests/cases/standalone/common/select/dummy.sql b/tests/cases/standalone/common/select/dummy.sql index 81ef9324a7ce..0e3da49ed3b1 100644 --- a/tests/cases/standalone/common/select/dummy.sql +++ b/tests/cases/standalone/common/select/dummy.sql @@ -10,6 +10,8 @@ select "A"; select * where "a" = "A"; +select GREATEST('1999-01-30', '2023-03-01'); + select TO_UNIXTIME('2023-03-01T06:35:02Z'); select TO_UNIXTIME(' 2023-03-01T06:35:02Z '); @@ -31,4 +33,3 @@ select b from test_unixtime; select TO_UNIXTIME(b) from test_unixtime; DROP TABLE test_unixtime; - From b790aa6dbba49c42b6dfff7febe3ab0e562c1d51 Mon Sep 17 00:00:00 2001 From: NiwakaDev Date: Tue, 26 Sep 2023 20:35:28 +0900 Subject: [PATCH 2/6] feat: make greatest take date_type as input --- .../src/scalars/timestamp/greatest.rs | 67 ++++++++++++++++--- .../standalone/common/select/dummy.result | 8 +++ .../cases/standalone/common/select/dummy.sql | 2 + 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/common/function/src/scalars/timestamp/greatest.rs b/src/common/function/src/scalars/timestamp/greatest.rs index 0233b480a5b2..992fb6153ae2 100644 --- a/src/common/function/src/scalars/timestamp/greatest.rs +++ b/src/common/function/src/scalars/timestamp/greatest.rs @@ -24,7 +24,7 @@ use datatypes::arrow::array::Date32Array; use datatypes::arrow::compute::kernels::comparison::gt_dyn; use datatypes::arrow::compute::kernels::zip; use datatypes::prelude::ConcreteDataType; -use datatypes::vectors::{Helper, StringVector, Vector, VectorRef}; +use datatypes::vectors::{DateVector, Helper, StringVector, Vector, VectorRef}; use snafu::{ensure, ResultExt}; use crate::scalars::function::{Function, FunctionContext}; @@ -34,14 +34,14 @@ pub struct GreatestFunction; const NAME: &str = "greatest"; -pub fn convert_to_date(arg: &str) -> Option { +fn convert_to_date(arg: &str) -> Option { match Date::from_str(arg) { Ok(ts) => Some(ts.val()), Err(_err) => None, } } -fn to_primitive_array(column: &VectorRef) -> Date32Array { +fn string_vector_to_date32_array(column: &VectorRef) -> Date32Array { let column = StringVector::try_from_arrow_array(column.to_arrow_array()).unwrap(); let column = (0..column.len()) .map(|idx| convert_to_date(&column.get(idx).to_string())) @@ -49,6 +49,21 @@ fn to_primitive_array(column: &VectorRef) -> Date32Array { Date32Array::from_iter(column) } +fn date_vector_to_date32_array(column: &VectorRef) -> Date32Array { + let column = DateVector::try_from_arrow_array(column.to_arrow_array()).unwrap(); + let column = (0..column.len()) + .map(|idx| { + column + .get(idx) + .as_value_ref() + .as_date() + .unwrap() + .map(|x| x.val()) + }) + .collect::>(); + Date32Array::from_iter(column) +} + impl Function for GreatestFunction { fn name(&self) -> &str { NAME @@ -61,7 +76,10 @@ impl Function for GreatestFunction { fn signature(&self) -> Signature { Signature::uniform( 2, - vec![ConcreteDataType::string_datatype()], + vec![ + ConcreteDataType::string_datatype(), + ConcreteDataType::date_datatype(), + ], Volatility::Immutable, ) } @@ -71,15 +89,23 @@ impl Function for GreatestFunction { columns.len() == 2, InvalidFuncArgsSnafu { err_msg: format!( - "The length of the args is not correct, expect exactly one, have: {}", + "The length of the args is not correct, expect exactly two, have: {}", columns.len() ), } ); match columns[0].data_type() { ConcreteDataType::String(_) => { - let column1 = to_primitive_array(&columns[0]); - let column2 = to_primitive_array(&columns[1]); + let column1 = string_vector_to_date32_array(&columns[0]); + let column2 = string_vector_to_date32_array(&columns[1]); + let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; + let result = + zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; + Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?) + } + ConcreteDataType::Date(_) => { + let column1 = date_vector_to_date32_array(&columns[0]); + let column2 = date_vector_to_date32_array(&columns[1]); let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; let result = zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; @@ -116,7 +142,7 @@ mod tests { use crate::scalars::Function; #[test] - fn test_greatest() { + fn test_greatest_takes_string_vector() { let function = GreatestFunction; assert_eq!( function.return_type(&[]).unwrap(), @@ -145,4 +171,29 @@ mod tests { Value::Date(Date::from_str("2012-12-23").unwrap()) ); } + + #[test] + fn test_greatest_takes_date_vector() { + let function = GreatestFunction; + assert_eq!( + function.return_type(&[]).unwrap(), + ConcreteDataType::Date(DateType) + ); + let columns = vec![ + Arc::new(DateVector::from_slice(vec![-1, 2])) as _, + Arc::new(DateVector::from_slice(vec![0, 1])) as _, + ]; + + let result = function.eval(FunctionContext::default(), &columns).unwrap(); + let result = result.as_any().downcast_ref::().unwrap(); + assert_eq!(result.len(), 2); + assert_eq!( + result.get(0), + Value::Date(Date::from_str("1970-01-01").unwrap()) + ); + assert_eq!( + result.get(1), + Value::Date(Date::from_str("1970-01-03").unwrap()) + ); + } } diff --git a/tests/cases/standalone/common/select/dummy.result b/tests/cases/standalone/common/select/dummy.result index d967a6510945..c44da94100bd 100644 --- a/tests/cases/standalone/common/select/dummy.result +++ b/tests/cases/standalone/common/select/dummy.result @@ -42,6 +42,14 @@ select GREATEST('1999-01-30', '2023-03-01'); | 2023-03-01 | +-------------------------------------------------+ +select greatest('2000-02-11'::Date, '2020-12-30'::Date); + ++-------------------------------------------------+ +| greatest(Utf8("2000-02-11"),Utf8("2020-12-30")) | ++-------------------------------------------------+ +| 2020-12-30 | ++-------------------------------------------------+ + select TO_UNIXTIME('2023-03-01T06:35:02Z'); +-------------------------------------------+ diff --git a/tests/cases/standalone/common/select/dummy.sql b/tests/cases/standalone/common/select/dummy.sql index 0e3da49ed3b1..14f36b3e9f09 100644 --- a/tests/cases/standalone/common/select/dummy.sql +++ b/tests/cases/standalone/common/select/dummy.sql @@ -12,6 +12,8 @@ select * where "a" = "A"; select GREATEST('1999-01-30', '2023-03-01'); +select greatest('2000-02-11'::Date, '2020-12-30'::Date); + select TO_UNIXTIME('2023-03-01T06:35:02Z'); select TO_UNIXTIME(' 2023-03-01T06:35:02Z '); From c3331b113df52b6ca60b8b003b161d42b132688a Mon Sep 17 00:00:00 2001 From: NiwakaDev Date: Wed, 27 Sep 2023 12:33:41 +0900 Subject: [PATCH 3/6] fix: move sqlness test into common/function/time.sql --- .../cases/standalone/common/function/time.result | 16 ++++++++++++++++ tests/cases/standalone/common/function/time.sql | 4 ++++ .../cases/standalone/common/select/dummy.result | 16 ---------------- tests/cases/standalone/common/select/dummy.sql | 4 ---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/tests/cases/standalone/common/function/time.result b/tests/cases/standalone/common/function/time.result index 5d5898fe1db4..123b6a3f2f7c 100644 --- a/tests/cases/standalone/common/function/time.result +++ b/tests/cases/standalone/common/function/time.result @@ -4,3 +4,19 @@ select current_time(); ++|current_time()|++|TIME|++ +select GREATEST('1999-01-30', '2023-03-01'); + ++-------------------------------------------------+ +| greatest(Utf8("1999-01-30"),Utf8("2023-03-01")) | ++-------------------------------------------------+ +| 2023-03-01 | ++-------------------------------------------------+ + +select GREATEST('2000-02-11'::Date, '2020-12-30'::Date); + ++-------------------------------------------------+ +| greatest(Utf8("2000-02-11"),Utf8("2020-12-30")) | ++-------------------------------------------------+ +| 2020-12-30 | ++-------------------------------------------------+ + diff --git a/tests/cases/standalone/common/function/time.sql b/tests/cases/standalone/common/function/time.sql index fd45687b424f..46d5c2347fd5 100644 --- a/tests/cases/standalone/common/function/time.sql +++ b/tests/cases/standalone/common/function/time.sql @@ -1,3 +1,7 @@ -- SQLNESS REPLACE (\d+:\d+:\d+\.\d+) TIME -- SQLNESS REPLACE [\s\-]+ select current_time(); + +select GREATEST('1999-01-30', '2023-03-01'); + +select GREATEST('2000-02-11'::Date, '2020-12-30'::Date); diff --git a/tests/cases/standalone/common/select/dummy.result b/tests/cases/standalone/common/select/dummy.result index c44da94100bd..58845ddf5860 100644 --- a/tests/cases/standalone/common/select/dummy.result +++ b/tests/cases/standalone/common/select/dummy.result @@ -34,22 +34,6 @@ select * where "a" = "A"; Error: 3000(PlanQuery), No field named a. -select GREATEST('1999-01-30', '2023-03-01'); - -+-------------------------------------------------+ -| greatest(Utf8("1999-01-30"),Utf8("2023-03-01")) | -+-------------------------------------------------+ -| 2023-03-01 | -+-------------------------------------------------+ - -select greatest('2000-02-11'::Date, '2020-12-30'::Date); - -+-------------------------------------------------+ -| greatest(Utf8("2000-02-11"),Utf8("2020-12-30")) | -+-------------------------------------------------+ -| 2020-12-30 | -+-------------------------------------------------+ - select TO_UNIXTIME('2023-03-01T06:35:02Z'); +-------------------------------------------+ diff --git a/tests/cases/standalone/common/select/dummy.sql b/tests/cases/standalone/common/select/dummy.sql index 14f36b3e9f09..4690900f50b9 100644 --- a/tests/cases/standalone/common/select/dummy.sql +++ b/tests/cases/standalone/common/select/dummy.sql @@ -10,10 +10,6 @@ select "A"; select * where "a" = "A"; -select GREATEST('1999-01-30', '2023-03-01'); - -select greatest('2000-02-11'::Date, '2020-12-30'::Date); - select TO_UNIXTIME('2023-03-01T06:35:02Z'); select TO_UNIXTIME(' 2023-03-01T06:35:02Z '); From 4cf45fe163d75c880416f5752b7a555d9554bf8e Mon Sep 17 00:00:00 2001 From: NiwakaDev Date: Wed, 27 Sep 2023 19:24:32 +0900 Subject: [PATCH 4/6] fix: avoid using unwarp --- .../src/scalars/timestamp/greatest.rs | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/src/common/function/src/scalars/timestamp/greatest.rs b/src/common/function/src/scalars/timestamp/greatest.rs index 992fb6153ae2..48a8a002faca 100644 --- a/src/common/function/src/scalars/timestamp/greatest.rs +++ b/src/common/function/src/scalars/timestamp/greatest.rs @@ -16,7 +16,8 @@ use std::fmt::{self}; use std::str::FromStr; use common_query::error::{ - self, ArrowComputeSnafu, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu, + self, ArrowComputeSnafu, FromArrowArraySnafu, InvalidFuncArgsSnafu, Result, + UnsupportedInputDataTypeSnafu, }; use common_query::prelude::{Signature, Volatility}; use common_time::Date; @@ -41,16 +42,18 @@ fn convert_to_date(arg: &str) -> Option { } } -fn string_vector_to_date32_array(column: &VectorRef) -> Date32Array { - let column = StringVector::try_from_arrow_array(column.to_arrow_array()).unwrap(); +fn string_vector_to_date32_array(column: &VectorRef) -> Result { + let column = + StringVector::try_from_arrow_array(column.to_arrow_array()).context(FromArrowArraySnafu)?; let column = (0..column.len()) .map(|idx| convert_to_date(&column.get(idx).to_string())) .collect::>(); - Date32Array::from_iter(column) + Ok(Date32Array::from_iter(column)) } -fn date_vector_to_date32_array(column: &VectorRef) -> Date32Array { - let column = DateVector::try_from_arrow_array(column.to_arrow_array()).unwrap(); +fn date_vector_to_date32_array(column: &VectorRef) -> Result { + let column = + DateVector::try_from_arrow_array(column.to_arrow_array()).context(FromArrowArraySnafu)?; let column = (0..column.len()) .map(|idx| { column @@ -61,7 +64,7 @@ fn date_vector_to_date32_array(column: &VectorRef) -> Date32Array { .map(|x| x.val()) }) .collect::>(); - Date32Array::from_iter(column) + Ok(Date32Array::from_iter(column)) } impl Function for GreatestFunction { @@ -96,16 +99,16 @@ impl Function for GreatestFunction { ); match columns[0].data_type() { ConcreteDataType::String(_) => { - let column1 = string_vector_to_date32_array(&columns[0]); - let column2 = string_vector_to_date32_array(&columns[1]); + let column1 = string_vector_to_date32_array(&columns[0])?; + let column2 = string_vector_to_date32_array(&columns[1])?; let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; let result = zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?) } ConcreteDataType::Date(_) => { - let column1 = date_vector_to_date32_array(&columns[0]); - let column2 = date_vector_to_date32_array(&columns[1]); + let column1 = date_vector_to_date32_array(&columns[0])?; + let column2 = date_vector_to_date32_array(&columns[1])?; let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; let result = zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; From fd6f82df6e8cbac7230d90404e0f60041db329a6 Mon Sep 17 00:00:00 2001 From: NiwakaDev Date: Thu, 28 Sep 2023 08:18:03 +0900 Subject: [PATCH 5/6] fix: use downcast --- .../src/scalars/timestamp/greatest.rs | 56 ++++++++++++++++--- 1 file changed, 49 insertions(+), 7 deletions(-) diff --git a/src/common/function/src/scalars/timestamp/greatest.rs b/src/common/function/src/scalars/timestamp/greatest.rs index 48a8a002faca..0a5f036ac760 100644 --- a/src/common/function/src/scalars/timestamp/greatest.rs +++ b/src/common/function/src/scalars/timestamp/greatest.rs @@ -16,7 +16,7 @@ use std::fmt::{self}; use std::str::FromStr; use common_query::error::{ - self, ArrowComputeSnafu, FromArrowArraySnafu, InvalidFuncArgsSnafu, Result, + self, ArrowComputeSnafu, DowncastVectorSnafu, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu, }; use common_query::prelude::{Signature, Volatility}; @@ -25,8 +25,8 @@ use datatypes::arrow::array::Date32Array; use datatypes::arrow::compute::kernels::comparison::gt_dyn; use datatypes::arrow::compute::kernels::zip; use datatypes::prelude::ConcreteDataType; -use datatypes::vectors::{DateVector, Helper, StringVector, Vector, VectorRef}; -use snafu::{ensure, ResultExt}; +use datatypes::vectors::{ConstantVector, DateVector, Helper, StringVector, Vector, VectorRef}; +use snafu::{ensure, OptionExt, ResultExt}; use crate::scalars::function::{Function, FunctionContext}; @@ -43,8 +43,29 @@ fn convert_to_date(arg: &str) -> Option { } fn string_vector_to_date32_array(column: &VectorRef) -> Result { - let column = - StringVector::try_from_arrow_array(column.to_arrow_array()).context(FromArrowArraySnafu)?; + let column = if column.is_const() { + let column: &ConstantVector = unsafe { Helper::static_cast(column) }; + column + .inner() + .as_any() + .downcast_ref::() + .context(DowncastVectorSnafu { + err_msg: format!( + "expect StringVector, got vector type {}", + column.vector_type_name() + ), + })? + } else { + column + .as_any() + .downcast_ref::() + .context(DowncastVectorSnafu { + err_msg: format!( + "expect StringVector, got vector type {}", + column.vector_type_name() + ), + })? + }; let column = (0..column.len()) .map(|idx| convert_to_date(&column.get(idx).to_string())) .collect::>(); @@ -52,8 +73,29 @@ fn string_vector_to_date32_array(column: &VectorRef) -> Result { } fn date_vector_to_date32_array(column: &VectorRef) -> Result { - let column = - DateVector::try_from_arrow_array(column.to_arrow_array()).context(FromArrowArraySnafu)?; + let column = if column.is_const() { + let column: &ConstantVector = unsafe { Helper::static_cast(column) }; + column + .inner() + .as_any() + .downcast_ref::() + .context(DowncastVectorSnafu { + err_msg: format!( + "expect DateVector, got vector type {}", + column.vector_type_name() + ), + })? + } else { + column + .as_any() + .downcast_ref::() + .context(DowncastVectorSnafu { + err_msg: format!( + "expect DateVector, got vector type {}", + column.vector_type_name() + ), + })? + }; let column = (0..column.len()) .map(|idx| { column From 4e2539a6dc02188f54173559b0a3b7af3a0fbaec Mon Sep 17 00:00:00 2001 From: NiwakaDev Date: Thu, 28 Sep 2023 19:12:05 +0900 Subject: [PATCH 6/6] refactor: simplify arrow cast --- .../src/scalars/timestamp/greatest.rs | 101 +++--------------- 1 file changed, 16 insertions(+), 85 deletions(-) diff --git a/src/common/function/src/scalars/timestamp/greatest.rs b/src/common/function/src/scalars/timestamp/greatest.rs index 0a5f036ac760..e583872d5a24 100644 --- a/src/common/function/src/scalars/timestamp/greatest.rs +++ b/src/common/function/src/scalars/timestamp/greatest.rs @@ -13,20 +13,19 @@ // limitations under the License. use std::fmt::{self}; -use std::str::FromStr; use common_query::error::{ - self, ArrowComputeSnafu, DowncastVectorSnafu, InvalidFuncArgsSnafu, Result, - UnsupportedInputDataTypeSnafu, + self, ArrowComputeSnafu, InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu, }; use common_query::prelude::{Signature, Volatility}; -use common_time::Date; -use datatypes::arrow::array::Date32Array; +use datatypes::arrow::array::AsArray; +use datatypes::arrow::compute::cast; use datatypes::arrow::compute::kernels::comparison::gt_dyn; use datatypes::arrow::compute::kernels::zip; +use datatypes::arrow::datatypes::{DataType as ArrowDataType, Date32Type}; use datatypes::prelude::ConcreteDataType; -use datatypes::vectors::{ConstantVector, DateVector, Helper, StringVector, Vector, VectorRef}; -use snafu::{ensure, OptionExt, ResultExt}; +use datatypes::vectors::{Helper, VectorRef}; +use snafu::{ensure, ResultExt}; use crate::scalars::function::{Function, FunctionContext}; @@ -35,80 +34,6 @@ pub struct GreatestFunction; const NAME: &str = "greatest"; -fn convert_to_date(arg: &str) -> Option { - match Date::from_str(arg) { - Ok(ts) => Some(ts.val()), - Err(_err) => None, - } -} - -fn string_vector_to_date32_array(column: &VectorRef) -> Result { - let column = if column.is_const() { - let column: &ConstantVector = unsafe { Helper::static_cast(column) }; - column - .inner() - .as_any() - .downcast_ref::() - .context(DowncastVectorSnafu { - err_msg: format!( - "expect StringVector, got vector type {}", - column.vector_type_name() - ), - })? - } else { - column - .as_any() - .downcast_ref::() - .context(DowncastVectorSnafu { - err_msg: format!( - "expect StringVector, got vector type {}", - column.vector_type_name() - ), - })? - }; - let column = (0..column.len()) - .map(|idx| convert_to_date(&column.get(idx).to_string())) - .collect::>(); - Ok(Date32Array::from_iter(column)) -} - -fn date_vector_to_date32_array(column: &VectorRef) -> Result { - let column = if column.is_const() { - let column: &ConstantVector = unsafe { Helper::static_cast(column) }; - column - .inner() - .as_any() - .downcast_ref::() - .context(DowncastVectorSnafu { - err_msg: format!( - "expect DateVector, got vector type {}", - column.vector_type_name() - ), - })? - } else { - column - .as_any() - .downcast_ref::() - .context(DowncastVectorSnafu { - err_msg: format!( - "expect DateVector, got vector type {}", - column.vector_type_name() - ), - })? - }; - let column = (0..column.len()) - .map(|idx| { - column - .get(idx) - .as_value_ref() - .as_date() - .unwrap() - .map(|x| x.val()) - }) - .collect::>(); - Ok(Date32Array::from_iter(column)) -} - impl Function for GreatestFunction { fn name(&self) -> &str { NAME @@ -141,16 +66,22 @@ impl Function for GreatestFunction { ); match columns[0].data_type() { ConcreteDataType::String(_) => { - let column1 = string_vector_to_date32_array(&columns[0])?; - let column2 = string_vector_to_date32_array(&columns[1])?; + let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date32) + .context(ArrowComputeSnafu)?; + let column1 = column1.as_primitive::(); + let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date32) + .context(ArrowComputeSnafu)?; + let column2 = column2.as_primitive::(); let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; let result = zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?; Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?) } ConcreteDataType::Date(_) => { - let column1 = date_vector_to_date32_array(&columns[0])?; - let column2 = date_vector_to_date32_array(&columns[1])?; + let column1 = columns[0].to_arrow_array(); + let column1 = column1.as_primitive::(); + let column2 = columns[1].to_arrow_array(); + let column2 = column2.as_primitive::(); let boolean_array = gt_dyn(&column1, &column2).context(ArrowComputeSnafu)?; let result = zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;