Skip to content

Commit

Permalink
feat: make greatest supports timestamp and datetime types (#5005)
Browse files Browse the repository at this point in the history
* feat: make greatest supports timestamp and datetime types

* chore: style

Co-authored-by: Lei, HUANG <[email protected]>

* refactor: greatest with gt_time_types macro

---------

Co-authored-by: Lei, HUANG <[email protected]>
  • Loading branch information
killme2008 and v0y4g3r authored Nov 19, 2024
1 parent 73e6bf3 commit 0dd02e9
Show file tree
Hide file tree
Showing 3 changed files with 181 additions and 28 deletions.
197 changes: 170 additions & 27 deletions src/common/function/src/scalars/timestamp/greatest.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,12 @@ use datafusion::arrow::compute::kernels::cmp::gt;
use datatypes::arrow::array::AsArray;
use datatypes::arrow::compute::cast;
use datatypes::arrow::compute::kernels::zip;
use datatypes::arrow::datatypes::{DataType as ArrowDataType, Date32Type};
use datatypes::arrow::datatypes::{
DataType as ArrowDataType, Date32Type, Date64Type, TimestampMicrosecondType,
TimestampMillisecondType, TimestampNanosecondType, TimestampSecondType,
};
use datatypes::prelude::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::vectors::{Helper, VectorRef};
use snafu::{ensure, ResultExt};

Expand All @@ -34,13 +38,47 @@ pub struct GreatestFunction;

const NAME: &str = "greatest";

macro_rules! gt_time_types {
($ty: ident, $columns:expr) => {{
let column1 = $columns[0].to_arrow_array();
let column2 = $columns[1].to_arrow_array();

let column1 = column1.as_primitive::<$ty>();
let column2 = column2.as_primitive::<$ty>();
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;

let result = zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)
}};
}

impl Function for GreatestFunction {
fn name(&self) -> &str {
NAME
}

fn return_type(&self, _input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
Ok(ConcreteDataType::date_datatype())
fn return_type(&self, input_types: &[ConcreteDataType]) -> Result<ConcreteDataType> {
ensure!(
input_types.len() == 2,
InvalidFuncArgsSnafu {
err_msg: format!(
"The length of the args is not correct, expect exactly two, have: {}",
input_types.len()
)
}
);

match &input_types[0] {
ConcreteDataType::String(_) => Ok(ConcreteDataType::datetime_datatype()),
ConcreteDataType::Date(_) => Ok(ConcreteDataType::date_datatype()),
ConcreteDataType::DateTime(_) => Ok(ConcreteDataType::datetime_datatype()),
ConcreteDataType::Timestamp(ts_type) => Ok(ConcreteDataType::Timestamp(*ts_type)),
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: input_types,
}
.fail(),
}
}

fn signature(&self) -> Signature {
Expand All @@ -49,6 +87,11 @@ impl Function for GreatestFunction {
vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::datetime_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_second_datatype(),
],
Volatility::Immutable,
)
Expand All @@ -66,27 +109,32 @@ impl Function for GreatestFunction {
);
match columns[0].data_type() {
ConcreteDataType::String(_) => {
let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date32)
// Treats string as `DateTime` type.
let column1 = cast(&columns[0].to_arrow_array(), &ArrowDataType::Date64)
.context(ArrowComputeSnafu)?;
let column1 = column1.as_primitive::<Date32Type>();
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date32)
let column1 = column1.as_primitive::<Date64Type>();
let column2 = cast(&columns[1].to_arrow_array(), &ArrowDataType::Date64)
.context(ArrowComputeSnafu)?;
let column2 = column2.as_primitive::<Date32Type>();
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
let result =
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
}
ConcreteDataType::Date(_) => {
let column1 = columns[0].to_arrow_array();
let column1 = column1.as_primitive::<Date32Type>();
let column2 = columns[1].to_arrow_array();
let column2 = column2.as_primitive::<Date32Type>();
let column2 = column2.as_primitive::<Date64Type>();
let boolean_array = gt(&column1, &column2).context(ArrowComputeSnafu)?;
let result =
zip::zip(&boolean_array, &column1, &column2).context(ArrowComputeSnafu)?;
Ok(Helper::try_into_vector(&result).context(error::FromArrowArraySnafu)?)
}
ConcreteDataType::Date(_) => gt_time_types!(Date32Type, columns),
ConcreteDataType::DateTime(_) => gt_time_types!(Date64Type, columns),
ConcreteDataType::Timestamp(ts_type) => match ts_type {
TimestampType::Second(_) => gt_time_types!(TimestampSecondType, columns),
TimestampType::Millisecond(_) => {
gt_time_types!(TimestampMillisecondType, columns)
}
TimestampType::Microsecond(_) => {
gt_time_types!(TimestampMicrosecondType, columns)
}
TimestampType::Nanosecond(_) => {
gt_time_types!(TimestampNanosecondType, columns)
}
},
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
datatypes: columns.iter().map(|c| c.data_type()).collect::<Vec<_>>(),
Expand All @@ -106,19 +154,31 @@ impl fmt::Display for GreatestFunction {
mod tests {
use std::sync::Arc;

use common_time::Date;
use datatypes::prelude::ConcreteDataType;
use datatypes::types::DateType;
use common_time::timestamp::TimeUnit;
use common_time::{Date, DateTime, Timestamp};
use datatypes::types::{
DateTimeType, DateType, TimestampMicrosecondType, TimestampMillisecondType,
TimestampNanosecondType, TimestampSecondType,
};
use datatypes::value::Value;
use datatypes::vectors::{DateVector, StringVector, Vector};
use datatypes::vectors::{
DateTimeVector, DateVector, StringVector, TimestampMicrosecondVector,
TimestampMillisecondVector, TimestampNanosecondVector, TimestampSecondVector, Vector,
};
use paste::paste;

use super::*;
#[test]
fn test_greatest_takes_string_vector() {
let function = GreatestFunction;
assert_eq!(
function.return_type(&[]).unwrap(),
ConcreteDataType::Date(DateType)
function
.return_type(&[
ConcreteDataType::string_datatype(),
ConcreteDataType::string_datatype()
])
.unwrap(),
ConcreteDataType::DateTime(DateTimeType)
);
let columns = vec![
Arc::new(StringVector::from(vec![
Expand All @@ -132,25 +192,31 @@ mod tests {
];

let result = function.eval(FunctionContext::default(), &columns).unwrap();
let result = result.as_any().downcast_ref::<DateVector>().unwrap();
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
assert_eq!(result.len(), 2);
assert_eq!(
result.get(0),
Value::Date(Date::from_str_utc("2001-02-01").unwrap())
Value::DateTime(DateTime::from_str("2001-02-01 00:00:00", None).unwrap())
);
assert_eq!(
result.get(1),
Value::Date(Date::from_str_utc("2012-12-23").unwrap())
Value::DateTime(DateTime::from_str("2012-12-23 00:00:00", None).unwrap())
);
}

#[test]
fn test_greatest_takes_date_vector() {
let function = GreatestFunction;
assert_eq!(
function.return_type(&[]).unwrap(),
function
.return_type(&[
ConcreteDataType::date_datatype(),
ConcreteDataType::date_datatype()
])
.unwrap(),
ConcreteDataType::Date(DateType)
);

let columns = vec![
Arc::new(DateVector::from_slice(vec![-1, 2])) as _,
Arc::new(DateVector::from_slice(vec![0, 1])) as _,
Expand All @@ -168,4 +234,81 @@ mod tests {
Value::Date(Date::from_str_utc("1970-01-03").unwrap())
);
}

#[test]
fn test_greatest_takes_datetime_vector() {
let function = GreatestFunction;
assert_eq!(
function
.return_type(&[
ConcreteDataType::datetime_datatype(),
ConcreteDataType::datetime_datatype()
])
.unwrap(),
ConcreteDataType::DateTime(DateTimeType)
);

let columns = vec![
Arc::new(DateTimeVector::from_slice(vec![-1, 2])) as _,
Arc::new(DateTimeVector::from_slice(vec![0, 1])) as _,
];

let result = function.eval(FunctionContext::default(), &columns).unwrap();
let result = result.as_any().downcast_ref::<DateTimeVector>().unwrap();
assert_eq!(result.len(), 2);
assert_eq!(
result.get(0),
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00", None).unwrap())
);
assert_eq!(
result.get(1),
Value::DateTime(DateTime::from_str("1970-01-01 00:00:00.002", None).unwrap())
);
}

macro_rules! test_timestamp {
($type: expr,$unit: ident) => {
paste! {
#[test]
fn [<test_greatest_takes_ $unit:lower _vector>]() {
let function = GreatestFunction;
assert_eq!(
function.return_type(&[$type, $type]).unwrap(),
ConcreteDataType::Timestamp(TimestampType::$unit([<Timestamp $unit Type>]))
);

let columns = vec![
Arc::new([<Timestamp $unit Vector>]::from_slice(vec![-1, 2])) as _,
Arc::new([<Timestamp $unit Vector>]::from_slice(vec![0, 1])) as _,
];

let result = function.eval(FunctionContext::default(), &columns).unwrap();
let result = result.as_any().downcast_ref::<[<Timestamp $unit Vector>]>().unwrap();
assert_eq!(result.len(), 2);
assert_eq!(
result.get(0),
Value::Timestamp(Timestamp::new(0, TimeUnit::$unit))
);
assert_eq!(
result.get(1),
Value::Timestamp(Timestamp::new(2, TimeUnit::$unit))
);
}
}
}
}

test_timestamp!(
ConcreteDataType::timestamp_nanosecond_datatype(),
Nanosecond
);
test_timestamp!(
ConcreteDataType::timestamp_microsecond_datatype(),
Microsecond
);
test_timestamp!(
ConcreteDataType::timestamp_millisecond_datatype(),
Millisecond
);
test_timestamp!(ConcreteDataType::timestamp_second_datatype(), Second);
}
10 changes: 9 additions & 1 deletion tests/cases/standalone/common/function/time.result
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ select GREATEST('1999-01-30', '2023-03-01');
+-------------------------------------------------+
| greatest(Utf8("1999-01-30"),Utf8("2023-03-01")) |
+-------------------------------------------------+
| 2023-03-01 |
| 2023-03-01T00:00:00 |
+-------------------------------------------------+

select GREATEST('2000-02-11'::Date, '2020-12-30'::Date);
Expand All @@ -20,3 +20,11 @@ select GREATEST('2000-02-11'::Date, '2020-12-30'::Date);
| 2020-12-30 |
+-------------------------------------------------+

select GREATEST('2021-07-01 00:00:00'::Timestamp, '2024-07-01 00:00:00'::Timestamp);

+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| greatest(arrow_cast(Utf8("2021-07-01 00:00:00"),Utf8("Timestamp(Millisecond, None)")),arrow_cast(Utf8("2024-07-01 00:00:00"),Utf8("Timestamp(Millisecond, None)"))) |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| 2024-07-01T00:00:00 |
+---------------------------------------------------------------------------------------------------------------------------------------------------------------------+

2 changes: 2 additions & 0 deletions tests/cases/standalone/common/function/time.sql
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,5 @@ select current_time();
select GREATEST('1999-01-30', '2023-03-01');

select GREATEST('2000-02-11'::Date, '2020-12-30'::Date);

select GREATEST('2021-07-01 00:00:00'::Timestamp, '2024-07-01 00:00:00'::Timestamp);

0 comments on commit 0dd02e9

Please sign in to comment.