Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: refactor and fix to_unixtime #2695

Merged
merged 6 commits into from
Nov 6, 2023
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 67 additions & 125 deletions src/common/function/src/scalars/timestamp/to_unixtime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,9 @@ use std::sync::Arc;

use common_query::error::{InvalidFuncArgsSnafu, Result, UnsupportedInputDataTypeSnafu};
use common_query::prelude::{Signature, Volatility};
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use common_time::{Date, Timestamp};
use datatypes::prelude::ConcreteDataType;
use datatypes::types::TimestampType;
use datatypes::vectors::{
Int64Vector, StringVector, TimestampMicrosecondVector, TimestampMillisecondVector,
TimestampNanosecondVector, TimestampSecondVector, Vector, VectorRef,
};
use datatypes::vectors::{Int64Vector, VectorRef};
use snafu::ensure;

use crate::scalars::function::{Function, FunctionContext};
Expand All @@ -37,17 +32,23 @@ const NAME: &str = "to_unixtime";

fn convert_to_seconds(arg: &str) -> Option<i64> {
match Timestamp::from_str(arg) {
Ok(ts) => {
let sec_mul = (TimeUnit::Second.factor() / ts.unit().factor()) as i64;
Some(ts.value().div_euclid(sec_mul))
}
Err(_err) => None,
Ok(ts) => Some(ts.split().0),
Err(_err) => match Date::from_str(arg) {
killme2008 marked this conversation as resolved.
Show resolved Hide resolved
Ok(date) => Some(date.to_secs()),
Err(_) => None,
},
}
killme2008 marked this conversation as resolved.
Show resolved Hide resolved
}

fn process_vector(vector: &dyn Vector) -> Vec<Option<i64>> {
fn convert_timestamps_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
(0..vector.len())
.map(|i| vector.get(i).as_timestamp().map(|ts| ts.split().0))
.collect::<Vec<Option<i64>>>()
}

fn convert_dates_to_seconds(vector: &VectorRef) -> Vec<Option<i64>> {
(0..vector.len())
.map(|i| paste::expr!((vector.get(i)).as_timestamp().map(|ts| ts.value())))
.map(|i| vector.get(i).as_date().map(|dt| dt.to_secs()))
.collect::<Vec<Option<i64>>>()
}

Expand All @@ -67,6 +68,7 @@ impl Function for ToUnixtimeFunction {
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
Expand All @@ -87,51 +89,25 @@ impl Function for ToUnixtimeFunction {
}
);

let vector = &columns[0];

match columns[0].data_type() {
ConcreteDataType::String(_) => {
let array = columns[0].to_arrow_array();
let vector = StringVector::try_from_arrow_array(&array).unwrap();
Ok(Arc::new(Int64Vector::from(
(0..vector.len())
.map(|i| convert_to_seconds(&vector.get(i).to_string()))
.collect::<Vec<_>>(),
)))
}
ConcreteDataType::String(_) => Ok(Arc::new(Int64Vector::from(
(0..vector.len())
.map(|i| convert_to_seconds(&vector.get(i).to_string()))
.collect::<Vec<_>>(),
))),
ConcreteDataType::Int64(_) | ConcreteDataType::Int32(_) => {
let array = columns[0].to_arrow_array();
Ok(Arc::new(Int64Vector::try_from_arrow_array(&array).unwrap()))
// Safety: cast always successfully at here
Ok(vector.cast(&ConcreteDataType::int64_datatype()).unwrap())
}
ConcreteDataType::Timestamp(ts) => {
let array = columns[0].to_arrow_array();
let value = match ts {
TimestampType::Second(_) => {
let vector = paste::expr!(TimestampSecondVector::try_from_arrow_array(
array
)
.unwrap());
process_vector(&vector)
}
TimestampType::Millisecond(_) => {
let vector = paste::expr!(
TimestampMillisecondVector::try_from_arrow_array(array).unwrap()
);
process_vector(&vector)
}
TimestampType::Microsecond(_) => {
let vector = paste::expr!(
TimestampMicrosecondVector::try_from_arrow_array(array).unwrap()
);
process_vector(&vector)
}
TimestampType::Nanosecond(_) => {
let vector = paste::expr!(TimestampNanosecondVector::try_from_arrow_array(
array
)
.unwrap());
process_vector(&vector)
}
};
Ok(Arc::new(Int64Vector::from(value)))
ConcreteDataType::Date(_) => {
let seconds = convert_dates_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
}
ConcreteDataType::Timestamp(_) => {
let seconds = convert_timestamps_to_seconds(vector);
Ok(Arc::new(Int64Vector::from(seconds)))
killme2008 marked this conversation as resolved.
Show resolved Hide resolved
}
_ => UnsupportedInputDataTypeSnafu {
function: NAME,
Expand All @@ -151,11 +127,9 @@ impl fmt::Display for ToUnixtimeFunction {
#[cfg(test)]
mod tests {
use common_query::prelude::TypeSignature;
use datatypes::prelude::{ConcreteDataType, ScalarVectorBuilder};
use datatypes::scalars::ScalarVector;
use datatypes::timestamp::TimestampSecond;
use datatypes::prelude::ConcreteDataType;
use datatypes::value::Value;
use datatypes::vectors::{StringVector, TimestampSecondVector};
use datatypes::vectors::{StringVector, TimestampMillisecondVector, TimestampSecondVector};

use super::{ToUnixtimeFunction, *};
use crate::scalars::Function;
Expand All @@ -170,18 +144,19 @@ mod tests {
);

assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::date_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
));

let times = vec![
Expand Down Expand Up @@ -212,26 +187,6 @@ mod tests {
#[test]
fn test_int_to_unixtime() {
let f = ToUnixtimeFunction;
assert_eq!("to_unixtime", f.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
f.return_type(&[]).unwrap()
);

assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
));

let times = vec![Some(3_i64), None, Some(5_i64), None];
let results = [Some(3), None, Some(5), None];
Expand All @@ -256,35 +211,10 @@ mod tests {
#[test]
fn test_timestamp_to_unixtime() {
let f = ToUnixtimeFunction;
assert_eq!("to_unixtime", f.name());
assert_eq!(
ConcreteDataType::int64_datatype(),
f.return_type(&[]).unwrap()
);

assert!(matches!(f.signature(),
Signature {
type_signature: TypeSignature::Uniform(1, valid_types),
volatility: Volatility::Immutable
} if valid_types == vec![
ConcreteDataType::string_datatype(),
ConcreteDataType::int32_datatype(),
ConcreteDataType::int64_datatype(),
ConcreteDataType::timestamp_second_datatype(),
ConcreteDataType::timestamp_millisecond_datatype(),
ConcreteDataType::timestamp_microsecond_datatype(),
ConcreteDataType::timestamp_nanosecond_datatype(),
]
));

let times: Vec<Option<TimestampSecond>> = vec![
Some(TimestampSecond::new(123)),
None,
Some(TimestampSecond::new(42)),
None,
];
let times = vec![Some(123), None, Some(42), None];
let results = [Some(123), None, Some(42), None];
let ts_vector: TimestampSecondVector = build_vector_from_slice(&times);
let ts_vector = TimestampSecondVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(ts_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
Expand All @@ -301,13 +231,25 @@ mod tests {
_ => unreachable!(),
}
}
}

fn build_vector_from_slice<T: ScalarVector>(items: &[Option<T::RefItem<'_>>]) -> T {
let mut builder = T::Builder::with_capacity(items.len());
for item in items {
builder.push(*item);
let times = vec![Some(123000), None, Some(42000), None];
let results = [Some(123), None, Some(42), None];
let ts_vector = TimestampMillisecondVector::from(times.clone());
let args: Vec<VectorRef> = vec![Arc::new(ts_vector)];
let vector = f.eval(FunctionContext::default(), &args).unwrap();
assert_eq!(4, vector.len());
for (i, _t) in times.iter().enumerate() {
let v = vector.get(i);
if i == 1 || i == 3 {
assert_eq!(Value::Null, v);
continue;
}
match v {
Value::Int64(ts) => {
assert_eq!(ts, (*results.get(i).unwrap()).unwrap());
}
_ => unreachable!(),
}
}
builder.finish()
}
}
2 changes: 1 addition & 1 deletion src/common/time/src/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,7 @@ impl Timestamp {

/// Split a [Timestamp] into seconds part and nanoseconds part.
/// Notice the seconds part of split result is always rounded down to floor.
fn split(&self) -> (i64, u32) {
pub fn split(&self) -> (i64, u32) {
let sec_mul = (TimeUnit::Second.factor() / self.unit.factor()) as i64;
let nsec_mul = (self.unit.factor() / TimeUnit::Nanosecond.factor()) as i64;

Expand Down
8 changes: 8 additions & 0 deletions src/datatypes/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,14 @@ impl Value {
}
}

/// Cast Value to Date. Return None if value is not a valid date data type.
pub fn as_date(&self) -> Option<Date> {
match self {
Value::Date(t) => Some(*t),
_ => None,
}
}

/// Cast Value to [Time]. Return None if value is not a valid time data type.
pub fn as_time(&self) -> Option<Time> {
match self {
Expand Down
42 changes: 25 additions & 17 deletions tests/cases/standalone/common/select/dummy.result
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,38 @@ select TO_UNIXTIME(2);
| 2 |
+-----------------------+

create table test_unixtime(a int, b timestamp time index);
select TO_UNIXTIME('2023-03-01');

+---------------------------------+
| to_unixtime(Utf8("2023-03-01")) |
+---------------------------------+
| 1677628800 |
+---------------------------------+

create table test_unixtime(a int, b timestamp_sec time index);

Affected Rows: 0

DESC TABLE test_unixtime;

+--------+----------------------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------+----------------------+-----+------+---------+---------------+
| a | Int32 | | YES | | FIELD |
| b | TimestampMillisecond | PRI | NO | | TIMESTAMP |
+--------+----------------------+-----+------+---------+---------------+
+--------+-----------------+-----+------+---------+---------------+
| Column | Type | Key | Null | Default | Semantic Type |
+--------+-----------------+-----+------+---------+---------------+
| a | Int32 | | YES | | FIELD |
| b | TimestampSecond | PRI | NO | | TIMESTAMP |
+--------+-----------------+-----+------+---------+---------------+

insert into test_unixtime values(27, 27);

Affected Rows: 1

select * from test_unixtime;

+----+-------------------------+
| a | b |
+----+-------------------------+
| 27 | 1970-01-01T00:00:00.027 |
+----+-------------------------+
+----+---------------------+
| a | b |
+----+---------------------+
| 27 | 1970-01-01T00:00:27 |
+----+---------------------+

select a from test_unixtime;

Expand All @@ -93,11 +101,11 @@ select a from test_unixtime;

select b from test_unixtime;

+-------------------------+
| b |
+-------------------------+
| 1970-01-01T00:00:00.027 |
+-------------------------+
+---------------------+
| b |
+---------------------+
| 1970-01-01T00:00:27 |
+---------------------+

select TO_UNIXTIME(b) from test_unixtime;

Expand Down
4 changes: 3 additions & 1 deletion tests/cases/standalone/common/select/dummy.sql
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ select TO_UNIXTIME(' 2023-03-01T06:35:02Z ');

select TO_UNIXTIME(2);

create table test_unixtime(a int, b timestamp time index);
select TO_UNIXTIME('2023-03-01');

create table test_unixtime(a int, b timestamp_sec time index);

DESC TABLE test_unixtime;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
-- description: Test scanning many big varchar strings with limited memory
CREATE TABLE test (a VARCHAR, ts timestamp time index);
CREATE TABLE test (a VARCHAR, ts timestamp_s time index);

Affected Rows: 0

Expand All @@ -22,7 +22,7 @@ INSERT INTO test SELECT a||a||a||a||a||a||a||a||a||a, to_unixtime(ts) * 7 FROM t
Affected Rows: 1

-- now create a second table, we only insert the big varchar string in there
CREATE TABLE bigtable (a VARCHAR, ts timestamp time index);
CREATE TABLE bigtable (a VARCHAR, ts timestamp_s time index);

Affected Rows: 0

Expand Down
Loading
Loading