Skip to content

Commit

Permalink
refactor: not allowed int64 type as time index (#2460)
Browse files Browse the repository at this point in the history
* refactor: remove is_timestamp_compatible.

* chore: fmt

* refactor: remove int64 to timestamp match

* chore

* chore: apply suggestions from code review

Co-authored-by: dennis zhuang <[email protected]>

* chore: fmt

---------

Co-authored-by: dennis zhuang <[email protected]>
  • Loading branch information
QuenKar and killme2008 authored Sep 22, 2023
1 parent c6e95ff commit aef9e7b
Show file tree
Hide file tree
Showing 21 changed files with 19 additions and 135 deletions.
46 changes: 4 additions & 42 deletions src/datatypes/src/data_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,10 @@ impl ConcreteDataType {
)
}

pub fn is_timestamp(&self) -> bool {
matches!(self, ConcreteDataType::Timestamp(_))
}

pub fn numerics() -> Vec<ConcreteDataType> {
vec![
ConcreteDataType::int8_datatype(),
Expand Down Expand Up @@ -217,9 +221,6 @@ impl ConcreteDataType {
/// Try to cast data type as a [`TimestampType`].
pub fn as_timestamp(&self) -> Option<TimestampType> {
match self {
ConcreteDataType::Int64(_) => {
Some(TimestampType::Millisecond(TimestampMillisecondType))
}
ConcreteDataType::Timestamp(t) => Some(*t),
_ => None,
}
Expand Down Expand Up @@ -473,10 +474,6 @@ pub trait DataType: std::fmt::Debug + Send + Sync {
/// Creates a mutable vector with given `capacity` of this type.
fn create_mutable_vector(&self, capacity: usize) -> Box<dyn MutableVector>;

/// Returns true if the data type is compatible with timestamp type so we can
/// use it as a timestamp.
fn is_timestamp_compatible(&self) -> bool;

/// Casts the value to specific DataType.
/// Return None if cast failed.
fn try_cast(&self, from: Value) -> Option<Value>;
Expand Down Expand Up @@ -596,41 +593,6 @@ mod tests {
);
}

#[test]
fn test_is_timestamp_compatible() {
assert!(ConcreteDataType::timestamp_datatype(TimeUnit::Second).is_timestamp_compatible());
assert!(
ConcreteDataType::timestamp_datatype(TimeUnit::Millisecond).is_timestamp_compatible()
);
assert!(
ConcreteDataType::timestamp_datatype(TimeUnit::Microsecond).is_timestamp_compatible()
);
assert!(
ConcreteDataType::timestamp_datatype(TimeUnit::Nanosecond).is_timestamp_compatible()
);
assert!(ConcreteDataType::timestamp_second_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::timestamp_millisecond_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::timestamp_microsecond_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::timestamp_nanosecond_datatype().is_timestamp_compatible());
assert!(ConcreteDataType::int64_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::null_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::binary_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::boolean_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::date_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::datetime_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::string_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::int32_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::uint64_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_second_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_millisecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_microsecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::time_nanosecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_second_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_millisecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_microsecond_datatype().is_timestamp_compatible());
assert!(!ConcreteDataType::duration_nanosecond_datatype().is_timestamp_compatible());
}

#[test]
fn test_is_null() {
assert!(ConcreteDataType::null_datatype().is_null());
Expand Down
3 changes: 1 addition & 2 deletions src/datatypes/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use arrow::datatypes::{Field, Schema as ArrowSchema};
use datafusion_common::DFSchemaRef;
use snafu::{ensure, ResultExt};

use crate::data_type::DataType;
use crate::error::{self, DuplicateColumnSnafu, Error, ProjectArrowSchemaSnafu, Result};
pub use crate::schema::column_schema::{ColumnSchema, Metadata, COMMENT_KEY, TIME_INDEX_KEY};
pub use crate::schema::constraint::ColumnDefaultConstraint;
Expand Down Expand Up @@ -269,7 +268,7 @@ fn validate_timestamp_index(column_schemas: &[ColumnSchema], timestamp_index: us

let column_schema = &column_schemas[timestamp_index];
ensure!(
column_schema.data_type.is_timestamp_compatible(),
column_schema.data_type.is_timestamp(),
error::InvalidTimestampIndexSnafu {
index: timestamp_index,
}
Expand Down
15 changes: 4 additions & 11 deletions src/datatypes/src/schema/constraint.rs
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ impl ColumnDefaultConstraint {
error::UnsupportedDefaultExprSnafu { expr }
);
ensure!(
data_type.is_timestamp_compatible(),
data_type.is_timestamp(),
error::DefaultValueTypeSnafu {
reason: "return value of the function must has timestamp type",
}
Expand Down Expand Up @@ -199,7 +199,7 @@ fn create_current_timestamp_vector(
let current_timestamp_vector = TimestampMillisecondVector::from_values(
std::iter::repeat(util::current_time_millis()).take(num_rows),
);
if data_type.is_timestamp_compatible() {
if data_type.is_timestamp() {
current_timestamp_vector.cast(data_type)
} else {
error::DefaultValueTypeSnafu {
Expand Down Expand Up @@ -350,15 +350,8 @@ mod tests {

// Int64 type.
let data_type = ConcreteDataType::int64_datatype();
let v = constraint
.create_default_vector(&data_type, false, 4)
.unwrap();
assert_eq!(4, v.len());
assert!(
matches!(v.get(0), Value::Int64(_)),
"v {:?} is not timestamp",
v.get(0)
);
let v = constraint.create_default_vector(&data_type, false, 4);
assert!(v.is_err());

let constraint = ColumnDefaultConstraint::Function("no".to_string());
let data_type = ConcreteDataType::timestamp_millisecond_datatype();
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/binary_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ impl DataType for BinaryType {
Box::new(BinaryVectorBuilder::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Binary(v) => Some(Value::Binary(v)),
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/boolean_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ impl DataType for BooleanType {
Box::new(BooleanVectorBuilder::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => Some(Value::Boolean(v)),
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/date_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@ impl DataType for DateType {
Box::new(DateVectorBuilder::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Int32(v) => Some(Value::Date(Date::from(v))),
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/datetime_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,6 @@ impl DataType for DateTimeType {
Box::new(DateTimeVectorBuilder::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Int64(v) => Some(Value::DateTime(DateTime::from(v))),
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/dictionary_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,10 +85,6 @@ impl DataType for DictionaryType {
unimplemented!()
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, _: Value) -> Option<Value> {
None
}
Expand Down
3 changes: 0 additions & 3 deletions src/datatypes/src/types/duration_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -98,9 +98,6 @@ macro_rules! impl_data_type_for_duration {
Box::new([<Duration $unit Vector Builder>]::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, _: Value) -> Option<Value> {
// TODO(QuenKar): Implement casting for duration types.
Expand Down
3 changes: 0 additions & 3 deletions src/datatypes/src/types/interval_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,6 @@ macro_rules! impl_data_type_for_interval {
Box::new([<Interval $unit Vector Builder>]::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, _: Value) -> Option<Value> {
// TODO(QuenKar): Implement casting for interval types.
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/list_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -76,10 +76,6 @@ impl DataType for ListType {
))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::List(v) => Some(Value::List(v)),
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/null_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -52,10 +52,6 @@ impl DataType for NullType {
Box::<NullVectorBuilder>::default()
}

fn is_timestamp_compatible(&self) -> bool {
false
}

// Unconditional cast other type to Value::Null
fn try_cast(&self, _from: Value) -> Option<Value> {
Some(Value::Null)
Expand Down
11 changes: 0 additions & 11 deletions src/datatypes/src/types/primitive_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,6 @@ macro_rules! define_non_timestamp_primitive {
Box::new(PrimitiveVectorBuilder::<$DataType>::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Expand Down Expand Up @@ -373,10 +370,6 @@ impl DataType for Int64Type {
Box::new(PrimitiveVectorBuilder::<Int64Type>::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
true
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => bool_to_numeric(v).map(Value::Int64),
Expand Down Expand Up @@ -424,10 +417,6 @@ impl DataType for Int32Type {
Box::new(PrimitiveVectorBuilder::<Int32Type>::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::Boolean(v) => bool_to_numeric(v).map(Value::Int32),
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/string_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -54,10 +54,6 @@ impl DataType for StringType {
Box::new(StringVectorBuilder::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
if from.logical_type_id() == self.logical_type_id() {
return Some(from);
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/time_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -112,10 +112,6 @@ macro_rules! impl_data_type_for_time {
Box::new([<Time $unit Vector Builder>]::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
false
}

fn try_cast(&self, from: Value) -> Option<Value> {
match from {
Value::$TargetType(v) => Some(Value::Time(Time::new(v as i64, TimeUnit::$unit))),
Expand Down
4 changes: 0 additions & 4 deletions src/datatypes/src/types/timestamp_type.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,6 @@ macro_rules! impl_data_type_for_timestamp {
Box::new([<Timestamp $unit Vector Builder>]::with_capacity(capacity))
}

fn is_timestamp_compatible(&self) -> bool {
true
}

fn try_cast(&self, from: Value)-> Option<Value>{
match from {
Value::Timestamp(v) => v.convert_to(TimeUnit::$unit).map(Value::Timestamp),
Expand Down
2 changes: 0 additions & 2 deletions src/datatypes/src/value.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,6 @@ impl Value {
/// Cast Value to timestamp. Return None if value is not a valid timestamp data type.
pub fn as_timestamp(&self) -> Option<Timestamp> {
match self {
Value::Int64(v) => Some(Timestamp::new_millisecond(*v)),
Value::Timestamp(t) => Some(*t),
_ => None,
}
Expand Down Expand Up @@ -388,7 +387,6 @@ pub fn duration_to_scalar_value(unit: TimeUnit, val: Option<i64>) -> ScalarValue
/// Return `None` if given scalar value cannot be converted to a valid timestamp.
pub fn scalar_value_to_timestamp(scalar: &ScalarValue) -> Option<Timestamp> {
match scalar {
ScalarValue::Int64(val) => val.map(Timestamp::new_millisecond),
ScalarValue::Utf8(Some(s)) => match Timestamp::from_str(s) {
Ok(t) => Some(t),
Err(e) => {
Expand Down
8 changes: 3 additions & 5 deletions src/mito2/src/read.rs
Original file line number Diff line number Diff line change
Expand Up @@ -382,9 +382,7 @@ impl Batch {
fn get_timestamp(&self, index: usize) -> Timestamp {
match self.timestamps.get_ref(index) {
ValueRef::Timestamp(timestamp) => timestamp,
// Int64 is always millisecond.
// TODO(yingwen): Don't allow using int64 as time index.
ValueRef::Int64(v) => Timestamp::new_millisecond(v),

// We have check the data type is timestamp compatible in the [BatchBuilder] so it's safe to panic.
value => panic!("{:?} is not a timestamp", value),
}
Expand Down Expand Up @@ -483,9 +481,9 @@ impl BatchBuilder {
pub fn timestamps_array(&mut self, array: ArrayRef) -> Result<&mut Self> {
let vector = Helper::try_into_vector(array).context(ConvertVectorSnafu)?;
ensure!(
vector.data_type().is_timestamp_compatible(),
vector.data_type().is_timestamp(),
InvalidBatchSnafu {
reason: format!("{:?} is a timestamp type", vector.data_type()),
reason: format!("{:?} is not a timestamp type", vector.data_type()),
}
);

Expand Down
3 changes: 1 addition & 2 deletions src/storage/src/sst/parquet.rs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ use async_stream::try_stream;
use async_trait::async_trait;
use common_telemetry::{debug, error};
use common_time::range::TimestampRange;
use common_time::timestamp::TimeUnit;
use common_time::Timestamp;
use datatypes::arrow::record_batch::RecordBatch;
use datatypes::prelude::ConcreteDataType;
Expand Down Expand Up @@ -162,7 +161,6 @@ fn decode_timestamp_range_inner(
let mut end = i64::MIN;

let unit = match ts_datatype {
ConcreteDataType::Int64(_) => TimeUnit::Millisecond,
ConcreteDataType::Timestamp(type_) => type_.unit(),
_ => {
return DecodeParquetTimeRangeSnafu {
Expand Down Expand Up @@ -358,6 +356,7 @@ mod tests {
use api::v1::OpType;
use common_base::readable_size::ReadableSize;
use common_test_util::temp_dir::create_temp_dir;
use common_time::timestamp::TimeUnit;
use datatypes::arrow::array::{Array, UInt64Array, UInt8Array};
use datatypes::prelude::{ScalarVector, Vector};
use datatypes::types::{TimestampMillisecondType, TimestampType};
Expand Down
9 changes: 2 additions & 7 deletions src/storage/src/sst/pruning.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,10 @@
use std::sync::Arc;

use arrow::array::{
PrimitiveArray, TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
TimestampMicrosecondArray, TimestampMillisecondArray, TimestampNanosecondArray,
TimestampSecondArray,
};
use arrow::datatypes::{DataType, Int64Type};
use arrow::datatypes::DataType;
use arrow::error::ArrowError;
use arrow_array::{Array, BooleanArray, RecordBatch};
use common_time::range::TimestampRange;
Expand All @@ -45,7 +45,6 @@ pub(crate) fn build_row_filter(
let ts_col_idx = store_schema.timestamp_index();
let ts_col = store_schema.columns().get(ts_col_idx)?;
let ts_col_unit = match &ts_col.desc.data_type {
ConcreteDataType::Int64(_) => TimeUnit::Millisecond,
ConcreteDataType::Timestamp(ts_type) => ts_type.unit(),
_ => unreachable!(),
};
Expand Down Expand Up @@ -205,7 +204,6 @@ impl ArrowPredicate for FastTimestampRowFilter {
downcast_and_compute!(TimestampNanosecondArray)
}
},
DataType::Int64 => downcast_and_compute!(PrimitiveArray<Int64Type>),
_ => {
unreachable!()
}
Expand Down Expand Up @@ -270,9 +268,6 @@ impl ArrowPredicate for PlainTimestampRowFilter {
downcast_and_compute!(TimestampNanosecondArray, Nanosecond)
}
},
DataType::Int64 => {
downcast_and_compute!(PrimitiveArray<Int64Type>, Millisecond)
}
_ => {
unreachable!()
}
Expand Down
Loading

0 comments on commit aef9e7b

Please sign in to comment.