Skip to content

Commit

Permalink
feat: type conversion between Values (#2394)
Browse files Browse the repository at this point in the history
* feat: add cast() in datatype trait.

* feat: add cast for primitive type

* feat: add unit test cases

* test: add datetime/time cases.

* refactor: time_type cast function.

* chore: typos.

* refactor code.

* feat: add can_cast_type func.

* chore: rename cast to try_cast

* feat: impl cast_with_opt

* chore: pub use cast_with_opt

* chore: add timezone for test

* Update src/common/time/src/date.rs

Co-authored-by: dennis zhuang <[email protected]>

* chore: duration type

---------

Co-authored-by: dennis zhuang <[email protected]>
  • Loading branch information
QuenKar and killme2008 authored Sep 18, 2023
1 parent 342cc0a commit 5805e8d
Show file tree
Hide file tree
Showing 20 changed files with 1,143 additions and 17 deletions.
14 changes: 14 additions & 0 deletions src/common/time/src/date.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ impl Date {
pub fn to_chrono_date(&self) -> Option<NaiveDate> {
NaiveDate::from_num_days_from_ce_opt(UNIX_EPOCH_FROM_CE + self.0)
}

pub fn to_secs(&self) -> i64 {
(self.0 as i64) * 24 * 3600
}
}

#[cfg(test)]
Expand Down Expand Up @@ -132,4 +136,14 @@ mod tests {
let d: Date = 42.into();
assert_eq!(42, d.val());
}

#[test]
fn test_to_secs() {
let d = Date::from_str("1970-01-01").unwrap();
assert_eq!(d.to_secs(), 0);
let d = Date::from_str("1970-01-02").unwrap();
assert_eq!(d.to_secs(), 24 * 3600);
let d = Date::from_str("1970-01-03").unwrap();
assert_eq!(d.to_secs(), 2 * 24 * 3600);
}
}
8 changes: 7 additions & 1 deletion src/common/time/src/datetime.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,12 @@ impl From<DateTime> for serde_json::Value {
}
}

impl From<NaiveDateTime> for DateTime {
fn from(value: NaiveDateTime) -> Self {
DateTime::from(value.timestamp())
}
}

impl FromStr for DateTime {
type Err = Error;

Expand Down Expand Up @@ -88,7 +94,7 @@ impl DateTime {
}

pub fn to_chrono_datetime(&self) -> Option<NaiveDateTime> {
NaiveDateTime::from_timestamp_millis(self.0)
NaiveDateTime::from_timestamp_opt(self.0, 0)
}
}

Expand Down
13 changes: 13 additions & 0 deletions src/common/time/src/time.rs
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,19 @@ impl Time {
self.value
}

/// Convert a time to given time unit.
/// Return `None` if conversion causes overflow.
pub fn convert_to(&self, unit: TimeUnit) -> Option<Time> {
if self.unit().factor() >= unit.factor() {
let mul = self.unit().factor() / unit.factor();
let value = self.value.checked_mul(mul as i64)?;
Some(Time::new(value, unit))
} else {
let mul = unit.factor() / self.unit().factor();
Some(Time::new(self.value.div_euclid(mul as i64), unit))
}
}

/// Split a [Time] into seconds part and nanoseconds part.
/// Notice the seconds part of split result is always rounded down to floor.
fn split(&self) -> (i64, u32) {
Expand Down
18 changes: 17 additions & 1 deletion src/common/time/src/timestamp.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ use std::str::FromStr;
use std::time::Duration;

use arrow::datatypes::TimeUnit as ArrowTimeUnit;
use chrono::{DateTime, LocalResult, NaiveDateTime, TimeZone as ChronoTimeZone, Utc};
use chrono::{
DateTime, LocalResult, NaiveDate, NaiveDateTime, NaiveTime, TimeZone as ChronoTimeZone, Utc,
};
use serde::{Deserialize, Serialize};
use snafu::{OptionExt, ResultExt};

Expand Down Expand Up @@ -250,11 +252,25 @@ impl Timestamp {
NaiveDateTime::from_timestamp_opt(sec, nsec)
}

/// Convert timestamp to chrono date.
pub fn to_chrono_date(&self) -> Option<NaiveDate> {
self.to_chrono_datetime().map(|ndt| ndt.date())
}

/// Convert timestamp to chrono time.
pub fn to_chrono_time(&self) -> Option<NaiveTime> {
self.to_chrono_datetime().map(|ndt| ndt.time())
}

pub fn from_chrono_datetime(ndt: NaiveDateTime) -> Option<Self> {
let sec = ndt.timestamp();
let nsec = ndt.timestamp_subsec_nanos();
Timestamp::from_splits(sec, nsec)
}

pub fn from_chrono_date(date: NaiveDate) -> Option<Self> {
Timestamp::from_chrono_datetime(date.and_time(NaiveTime::default()))
}
}

impl FromStr for Timestamp {
Expand Down
24 changes: 24 additions & 0 deletions src/datatypes/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ impl ConcreteDataType {
matches!(self, ConcreteDataType::Boolean(_))
}

pub fn is_string(&self) -> bool {
matches!(self, ConcreteDataType::String(_))
}

pub fn is_stringifiable(&self) -> bool {
matches!(
self,
Expand Down Expand Up @@ -159,6 +163,22 @@ impl ConcreteDataType {
)
}

pub fn is_numeric(&self) -> bool {
matches!(
self,
ConcreteDataType::Int8(_)
| ConcreteDataType::Int16(_)
| ConcreteDataType::Int32(_)
| ConcreteDataType::Int64(_)
| ConcreteDataType::UInt8(_)
| ConcreteDataType::UInt16(_)
| ConcreteDataType::UInt32(_)
| ConcreteDataType::UInt64(_)
| ConcreteDataType::Float32(_)
| ConcreteDataType::Float64(_)
)
}

pub fn numerics() -> Vec<ConcreteDataType> {
vec![
ConcreteDataType::int8_datatype(),
Expand Down Expand Up @@ -456,6 +476,10 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
/// Returns true if the data type is compatible with timestamp type so we can
/// use it as a timestamp.
fn is_timestamp_compatible(&self) -> bool;

/// Casts the value to specific DataType.
/// Return None if cast failed.
fn try_cast(&self, from: Value) -> Option<Value>;
}

pub type DataTypeRef = Arc<dyn DataType>;
Expand Down
2 changes: 2 additions & 0 deletions src/datatypes/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

mod binary_type;
mod boolean_type;
pub mod cast;
mod date_type;
mod datetime_type;
mod dictionary_type;
Expand All @@ -28,6 +29,7 @@ mod timestamp_type;

pub use binary_type::BinaryType;
pub use boolean_type::BooleanType;
pub use cast::cast_with_opt;
pub use date_type::DateType;
pub use datetime_type::DateTimeType;
pub use dictionary_type::DictionaryType;
Expand Down
8 changes: 8 additions & 0 deletions src/datatypes/src/types/binary_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -57,4 +57,12 @@ impl DataType for BinaryType {
fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Binary(v) => Some(Value::Binary(v)),
Value::String(v) => Some(Value::Binary(Bytes::from(v.as_utf8().as_bytes()))),
_ => None,
}
}
}
134 changes: 134 additions & 0 deletions src/datatypes/src/types/boolean_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
use std::sync::Arc;

use arrow::datatypes::DataType as ArrowDataType;
use num_traits::Num;
use serde::{Deserialize, Serialize};

use crate::data_type::{DataType, DataTypeRef};
Expand Down Expand Up @@ -56,4 +57,137 @@ impl DataType for BooleanType {
fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => Some(Value::Boolean(v)),
Value::UInt8(v) => numeric_to_bool(v),
Value::UInt16(v) => numeric_to_bool(v),
Value::UInt32(v) => numeric_to_bool(v),
Value::UInt64(v) => numeric_to_bool(v),
Value::Int8(v) => numeric_to_bool(v),
Value::Int16(v) => numeric_to_bool(v),
Value::Int32(v) => numeric_to_bool(v),
Value::Int64(v) => numeric_to_bool(v),
Value::Float32(v) => numeric_to_bool(v),
Value::Float64(v) => numeric_to_bool(v),
Value::String(v) => v.as_utf8().parse::<bool>().ok().map(Value::Boolean),
_ => None,
}
}
}

pub fn numeric_to_bool<T>(num: T) -> Option<Value>
where
T: Num + Default,
{
if num != T::default() {
Some(Value::Boolean(true))
} else {
Some(Value::Boolean(false))
}
}

pub fn bool_to_numeric<T>(value: bool) -> Option<T>
where
T: Num,
{
if value {
Some(T::one())
} else {
Some(T::zero())
}
}

#[cfg(test)]
mod tests {

use ordered_float::OrderedFloat;

use super::*;
use crate::data_type::ConcreteDataType;

macro_rules! test_cast_to_bool {
($value: expr, $expected: expr) => {
let val = $value;
let b = ConcreteDataType::boolean_datatype().try_cast(val).unwrap();
assert_eq!(b, Value::Boolean($expected));
};
}

macro_rules! test_cast_from_bool {
($value: expr, $datatype: expr, $expected: expr) => {
let val = Value::Boolean($value);
let b = $datatype.try_cast(val).unwrap();
assert_eq!(b, $expected);
};
}

#[test]
fn test_other_type_cast_to_bool() {
// false cases
test_cast_to_bool!(Value::UInt8(0), false);
test_cast_to_bool!(Value::UInt16(0), false);
test_cast_to_bool!(Value::UInt32(0), false);
test_cast_to_bool!(Value::UInt64(0), false);
test_cast_to_bool!(Value::Int8(0), false);
test_cast_to_bool!(Value::Int16(0), false);
test_cast_to_bool!(Value::Int32(0), false);
test_cast_to_bool!(Value::Int64(0), false);
test_cast_to_bool!(Value::Float32(OrderedFloat(0.0)), false);
test_cast_to_bool!(Value::Float64(OrderedFloat(0.0)), false);
// true cases
test_cast_to_bool!(Value::UInt8(1), true);
test_cast_to_bool!(Value::UInt16(2), true);
test_cast_to_bool!(Value::UInt32(3), true);
test_cast_to_bool!(Value::UInt64(4), true);
test_cast_to_bool!(Value::Int8(5), true);
test_cast_to_bool!(Value::Int16(6), true);
test_cast_to_bool!(Value::Int32(7), true);
test_cast_to_bool!(Value::Int64(8), true);
test_cast_to_bool!(Value::Float32(OrderedFloat(1.0)), true);
test_cast_to_bool!(Value::Float64(OrderedFloat(2.0)), true);
}

#[test]
fn test_bool_cast_to_other_type() {
// false cases
test_cast_from_bool!(false, ConcreteDataType::uint8_datatype(), Value::UInt8(0));
test_cast_from_bool!(false, ConcreteDataType::uint16_datatype(), Value::UInt16(0));
test_cast_from_bool!(false, ConcreteDataType::uint32_datatype(), Value::UInt32(0));
test_cast_from_bool!(false, ConcreteDataType::uint64_datatype(), Value::UInt64(0));
test_cast_from_bool!(false, ConcreteDataType::int8_datatype(), Value::Int8(0));
test_cast_from_bool!(false, ConcreteDataType::int16_datatype(), Value::Int16(0));
test_cast_from_bool!(false, ConcreteDataType::int32_datatype(), Value::Int32(0));
test_cast_from_bool!(false, ConcreteDataType::int64_datatype(), Value::Int64(0));
test_cast_from_bool!(
false,
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(0.0))
);
test_cast_from_bool!(
false,
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(0.0))
);
// true cases
test_cast_from_bool!(true, ConcreteDataType::uint8_datatype(), Value::UInt8(1));
test_cast_from_bool!(true, ConcreteDataType::uint16_datatype(), Value::UInt16(1));
test_cast_from_bool!(true, ConcreteDataType::uint32_datatype(), Value::UInt32(1));
test_cast_from_bool!(true, ConcreteDataType::uint64_datatype(), Value::UInt64(1));
test_cast_from_bool!(true, ConcreteDataType::int8_datatype(), Value::Int8(1));
test_cast_from_bool!(true, ConcreteDataType::int16_datatype(), Value::Int16(1));
test_cast_from_bool!(true, ConcreteDataType::int32_datatype(), Value::Int32(1));
test_cast_from_bool!(true, ConcreteDataType::int64_datatype(), Value::Int64(1));
test_cast_from_bool!(
true,
ConcreteDataType::float32_datatype(),
Value::Float32(OrderedFloat(1.0))
);
test_cast_from_bool!(
true,
ConcreteDataType::float64_datatype(),
Value::Float64(OrderedFloat(1.0))
);
}
}
Loading

0 comments on commit 5805e8d

Please sign in to comment.