From aef9e7bfc3f72e95279fc0e665c6b124b404ba90 Mon Sep 17 00:00:00 2001 From: Wei <47681251+QuenKar@users.noreply.github.com> Date: Fri, 22 Sep 2023 14:28:02 +0800 Subject: [PATCH] refactor: not allowed int64 type as time index (#2460) * refactor: remove is_timestamp_compatible. * chore: fmt * refactor: remove int64 to timestamp match * chore * chore: apply suggestions from code review Co-authored-by: dennis zhuang * chore: fmt --------- Co-authored-by: dennis zhuang --- src/datatypes/src/data_type.rs | 46 ++-------------------- src/datatypes/src/schema.rs | 3 +- src/datatypes/src/schema/constraint.rs | 15 ++----- src/datatypes/src/types/binary_type.rs | 4 -- src/datatypes/src/types/boolean_type.rs | 4 -- src/datatypes/src/types/date_type.rs | 4 -- src/datatypes/src/types/datetime_type.rs | 4 -- src/datatypes/src/types/dictionary_type.rs | 4 -- src/datatypes/src/types/duration_type.rs | 3 -- src/datatypes/src/types/interval_type.rs | 3 -- src/datatypes/src/types/list_type.rs | 4 -- src/datatypes/src/types/null_type.rs | 4 -- src/datatypes/src/types/primitive_type.rs | 11 ------ src/datatypes/src/types/string_type.rs | 4 -- src/datatypes/src/types/time_type.rs | 4 -- src/datatypes/src/types/timestamp_type.rs | 4 -- src/datatypes/src/value.rs | 2 - src/mito2/src/read.rs | 8 ++-- src/storage/src/sst/parquet.rs | 3 +- src/storage/src/sst/pruning.rs | 9 +---- src/store-api/src/metadata.rs | 11 ++---- 21 files changed, 19 insertions(+), 135 deletions(-) diff --git a/src/datatypes/src/data_type.rs b/src/datatypes/src/data_type.rs index 541bc6b8b3b2..938c8ba498dc 100644 --- a/src/datatypes/src/data_type.rs +++ b/src/datatypes/src/data_type.rs @@ -179,6 +179,10 @@ impl ConcreteDataType { ) } + pub fn is_timestamp(&self) -> bool { + matches!(self, ConcreteDataType::Timestamp(_)) + } + pub fn numerics() -> Vec { vec![ ConcreteDataType::int8_datatype(), @@ -217,9 +221,6 @@ impl ConcreteDataType { /// Try to cast data type as a [`TimestampType`]. pub fn as_timestamp(&self) -> Option { match self { - ConcreteDataType::Int64(_) => { - Some(TimestampType::Millisecond(TimestampMillisecondType)) - } ConcreteDataType::Timestamp(t) => Some(*t), _ => None, } @@ -473,10 +474,6 @@ pub trait DataType: std::fmt::Debug + Send + Sync { /// Creates a mutable vector with given `capacity` of this type. fn create_mutable_vector(&self, capacity: usize) -> Box; - /// Returns true if the data type is compatible with timestamp type so we can - /// use it as a timestamp. - fn is_timestamp_compatible(&self) -> bool; - /// Casts the value to specific DataType. /// Return None if cast failed. fn try_cast(&self, from: Value) -> Option; @@ -596,41 +593,6 @@ mod tests { ); } - #[test] - fn test_is_timestamp_compatible() { - assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible()); - assert!( - ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible() - ); - assert!( - ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible() - ); - assert!( - ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible() - ); - assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible()); - assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible()); - assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible()); - assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible()); - assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::time_second_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::time_millisecond_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::time_microsecond_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::time_nanosecond_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::duration_second_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::duration_millisecond_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::duration_microsecond_datatype().is_timestamp_compatible()); - assert!(!ConcreteDataType::duration_nanosecond_datatype().is_timestamp_compatible()); - } - #[test] fn test_is_null() { assert!(ConcreteDataType::null_datatype().is_null()); diff --git a/src/datatypes/src/schema.rs b/src/datatypes/src/schema.rs index fd6da64beb29..b506dd12ea00 100644 --- a/src/datatypes/src/schema.rs +++ b/src/datatypes/src/schema.rs @@ -23,7 +23,6 @@ use arrow::datatypes::{Field, Schema as ArrowSchema}; use datafusion_common::DFSchemaRef; use snafu::{ensure, ResultExt}; -use crate::data_type::DataType; use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result}; pub use crate::schema::column_schema::{ColumnSchema, Metadata, COMMENT_KEY, TIME_INDEX_KEY}; pub use crate::schema::constraint::ColumnDefaultConstraint; @@ -269,7 +268,7 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us let column_schema = &column_schemas[timestamp_index]; ensure!( - column_schema.data_type.is_timestamp_compatible(), + column_schema.data_type.is_timestamp(), error::InvalidTimestampIndexSnafu { index: timestamp_index, } diff --git a/src/datatypes/src/schema/constraint.rs b/src/datatypes/src/schema/constraint.rs index b00a8d7dcaf7..e0dacbd5f719 100644 --- a/src/datatypes/src/schema/constraint.rs +++ b/src/datatypes/src/schema/constraint.rs @@ -81,7 +81,7 @@ impl ColumnDefaultConstraint { error::UnsupportedDefaultExprSnafu { expr } ); ensure!( - data_type.is_timestamp_compatible(), + data_type.is_timestamp(), error::DefaultValueTypeSnafu { reason: "return value of the function must has timestamp type", } @@ -199,7 +199,7 @@ fn create_current_timestamp_vector( let current_timestamp_vector = TimestampMillisecondVector::from_values( std::iter::repeat(util::current_time_millis()).take(num_rows), ); - if data_type.is_timestamp_compatible() { + if data_type.is_timestamp() { current_timestamp_vector.cast(data_type) } else { error::DefaultValueTypeSnafu { @@ -350,15 +350,8 @@ mod tests { // Int64 type. let data_type = ConcreteDataType::int64_datatype(); - let v = constraint - .create_default_vector(&data_type, false, 4) - .unwrap(); - assert_eq!(4, v.len()); - assert!( - matches!(v.get(0), Value::Int64(_)), - "v {:?} is not timestamp", - v.get(0) - ); + let v = constraint.create_default_vector(&data_type, false, 4); + assert!(v.is_err()); let constraint = ColumnDefaultConstraint::Function("no".to_string()); let data_type = ConcreteDataType::timestamp_millisecond_datatype(); diff --git a/src/datatypes/src/types/binary_type.rs b/src/datatypes/src/types/binary_type.rs index c9e8d7f12b6e..6f4c1f6bc0b5 100644 --- a/src/datatypes/src/types/binary_type.rs +++ b/src/datatypes/src/types/binary_type.rs @@ -54,10 +54,6 @@ impl DataType for BinaryType { Box::new(BinaryVectorBuilder::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, from: Value) -> Option { match from { Value::Binary(v) => Some(Value::Binary(v)), diff --git a/src/datatypes/src/types/boolean_type.rs b/src/datatypes/src/types/boolean_type.rs index df33d3862ce2..1d4b9e80a2b9 100644 --- a/src/datatypes/src/types/boolean_type.rs +++ b/src/datatypes/src/types/boolean_type.rs @@ -54,10 +54,6 @@ impl DataType for BooleanType { Box::new(BooleanVectorBuilder::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, from: Value) -> Option { match from { Value::Boolean(v) => Some(Value::Boolean(v)), diff --git a/src/datatypes/src/types/date_type.rs b/src/datatypes/src/types/date_type.rs index 1bc243da3a60..8bbde3a7c7f0 100644 --- a/src/datatypes/src/types/date_type.rs +++ b/src/datatypes/src/types/date_type.rs @@ -52,10 +52,6 @@ impl DataType for DateType { Box::new(DateVectorBuilder::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, from: Value) -> Option { match from { Value::Int32(v) => Some(Value::Date(Date::from(v))), diff --git a/src/datatypes/src/types/datetime_type.rs b/src/datatypes/src/types/datetime_type.rs index 76b432e82584..cd0e5a3cd1bf 100644 --- a/src/datatypes/src/types/datetime_type.rs +++ b/src/datatypes/src/types/datetime_type.rs @@ -50,10 +50,6 @@ impl DataType for DateTimeType { Box::new(DateTimeVectorBuilder::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, from: Value) -> Option { match from { Value::Int64(v) => Some(Value::DateTime(DateTime::from(v))), diff --git a/src/datatypes/src/types/dictionary_type.rs b/src/datatypes/src/types/dictionary_type.rs index fdbdd85ac1c0..cc29c41403df 100644 --- a/src/datatypes/src/types/dictionary_type.rs +++ b/src/datatypes/src/types/dictionary_type.rs @@ -85,10 +85,6 @@ impl DataType for DictionaryType { unimplemented!() } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, _: Value) -> Option { None } diff --git a/src/datatypes/src/types/duration_type.rs b/src/datatypes/src/types/duration_type.rs index 94c80a7962b3..ffc8fe92467b 100644 --- a/src/datatypes/src/types/duration_type.rs +++ b/src/datatypes/src/types/duration_type.rs @@ -98,9 +98,6 @@ macro_rules! impl_data_type_for_duration { Box::new([]::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } fn try_cast(&self, _: Value) -> Option { // TODO(QuenKar): Implement casting for duration types. diff --git a/src/datatypes/src/types/interval_type.rs b/src/datatypes/src/types/interval_type.rs index b87df2733717..1acc506cfce0 100644 --- a/src/datatypes/src/types/interval_type.rs +++ b/src/datatypes/src/types/interval_type.rs @@ -86,9 +86,6 @@ macro_rules! impl_data_type_for_interval { Box::new([]::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } fn try_cast(&self, _: Value) -> Option { // TODO(QuenKar): Implement casting for interval types. diff --git a/src/datatypes/src/types/list_type.rs b/src/datatypes/src/types/list_type.rs index 4b4769ed3862..37d620620297 100644 --- a/src/datatypes/src/types/list_type.rs +++ b/src/datatypes/src/types/list_type.rs @@ -76,10 +76,6 @@ impl DataType for ListType { )) } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, from: Value) -> Option { match from { Value::List(v) => Some(Value::List(v)), diff --git a/src/datatypes/src/types/null_type.rs b/src/datatypes/src/types/null_type.rs index e69cdae24985..04c44c38c573 100644 --- a/src/datatypes/src/types/null_type.rs +++ b/src/datatypes/src/types/null_type.rs @@ -52,10 +52,6 @@ impl DataType for NullType { Box::::default() } - fn is_timestamp_compatible(&self) -> bool { - false - } - // Unconditional cast other type to Value::Null fn try_cast(&self, _from: Value) -> Option { Some(Value::Null) diff --git a/src/datatypes/src/types/primitive_type.rs b/src/datatypes/src/types/primitive_type.rs index 7bf90c964a3c..52e1bd30a7c6 100644 --- a/src/datatypes/src/types/primitive_type.rs +++ b/src/datatypes/src/types/primitive_type.rs @@ -271,9 +271,6 @@ macro_rules! define_non_timestamp_primitive { Box::new(PrimitiveVectorBuilder::<$DataType>::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } fn try_cast(&self, from: Value) -> Option { match from { @@ -373,10 +370,6 @@ impl DataType for Int64Type { Box::new(PrimitiveVectorBuilder::::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - true - } - fn try_cast(&self, from: Value) -> Option { match from { Value::Boolean(v) => bool_to_numeric(v).map(Value::Int64), @@ -424,10 +417,6 @@ impl DataType for Int32Type { Box::new(PrimitiveVectorBuilder::::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, from: Value) -> Option { match from { Value::Boolean(v) => bool_to_numeric(v).map(Value::Int32), diff --git a/src/datatypes/src/types/string_type.rs b/src/datatypes/src/types/string_type.rs index 85a970f116d6..febff36324a1 100644 --- a/src/datatypes/src/types/string_type.rs +++ b/src/datatypes/src/types/string_type.rs @@ -54,10 +54,6 @@ impl DataType for StringType { Box::new(StringVectorBuilder::with_capacity(capacity)) } - fn is_timestamp_compatible(&self) -> bool { - false - } - fn try_cast(&self, from: Value) -> Option { if from.logical_type_id() == self.logical_type_id() { return Some(from); diff --git a/src/datatypes/src/types/time_type.rs b/src/datatypes/src/types/time_type.rs index aaa9fec914e7..a8d48a7f586d 100644 --- a/src/datatypes/src/types/time_type.rs +++ b/src/datatypes/src/types/time_type.rs @@ -112,10 +112,6 @@ macro_rules! impl_data_type_for_time { Box::new([