From d7ad4fec7c66526c8b4bf8dea8a6d4c27fd23c96 Mon Sep 17 00:00:00 2001 From: Nick Cameron Date: Tue, 20 Aug 2024 07:08:44 +1200 Subject: [PATCH] Implement date_part for durations (#6246) Signed-off-by: Nick Cameron --- arrow-arith/src/temporal.rs | 287 +++++++++++++++++++++++++++++++++++- 1 file changed, 286 insertions(+), 1 deletion(-) diff --git a/arrow-arith/src/temporal.rs b/arrow-arith/src/temporal.rs index 6f8e8cc181c3..5f3eeb325104 100644 --- a/arrow-arith/src/temporal.rs +++ b/arrow-arith/src/temporal.rs @@ -32,7 +32,7 @@ use arrow_array::timezone::Tz; use arrow_array::types::*; use arrow_array::*; use arrow_buffer::ArrowNativeType; -use arrow_schema::{ArrowError, DataType, IntervalUnit}; +use arrow_schema::{ArrowError, DataType, IntervalUnit, TimeUnit}; /// Valid parts to extract from date/time/timestamp arrays. /// @@ -113,6 +113,7 @@ where /// - Time32/Time64 /// - Timestamp /// - Interval +/// - Duration /// /// Returns an [`Int32Array`] unless input was a dictionary type, in which case returns /// the dictionary but with this function applied onto its values. @@ -154,6 +155,26 @@ pub fn date_part(array: &dyn Array, part: DatePart) -> Result { + let array = as_primitive_array::(array).date_part(part)?; + let array = Arc::new(array) as ArrayRef; + Ok(array) + } + DataType::Duration(TimeUnit::Millisecond) => { + let array = as_primitive_array::(array).date_part(part)?; + let array = Arc::new(array) as ArrayRef; + Ok(array) + } + DataType::Duration(TimeUnit::Microsecond) => { + let array = as_primitive_array::(array).date_part(part)?; + let array = Arc::new(array) as ArrayRef; + Ok(array) + } + DataType::Duration(TimeUnit::Nanosecond) => { + let array = as_primitive_array::(array).date_part(part)?; + let array = Arc::new(array) as ArrayRef; + Ok(array) + } DataType::Dictionary(_, _) => { let array = array.as_any_dictionary(); let values = date_part(array.values(), part)?; @@ -482,6 +503,126 @@ impl ExtractDatePartExt for PrimitiveArray { } } +impl ExtractDatePartExt for PrimitiveArray { + fn date_part(&self, part: DatePart) -> Result { + match part { + DatePart::Week => Ok(self.unary_opt(|d| (d / (60 * 60 * 24 * 7)).try_into().ok())), + DatePart::Day => Ok(self.unary_opt(|d| (d / (60 * 60 * 24)).try_into().ok())), + DatePart::Hour => Ok(self.unary_opt(|d| (d / (60 * 60)).try_into().ok())), + DatePart::Minute => Ok(self.unary_opt(|d| (d / 60).try_into().ok())), + DatePart::Second => Ok(self.unary_opt(|d| d.try_into().ok())), + DatePart::Millisecond => { + Ok(self.unary_opt(|d| d.checked_mul(1_000).and_then(|d| d.try_into().ok()))) + } + DatePart::Microsecond => { + Ok(self.unary_opt(|d| d.checked_mul(1_000_000).and_then(|d| d.try_into().ok()))) + } + DatePart::Nanosecond => Ok( + self.unary_opt(|d| d.checked_mul(1_000_000_000).and_then(|d| d.try_into().ok())) + ), + + DatePart::Year + | DatePart::Quarter + | DatePart::Month + | DatePart::DayOfWeekSunday0 + | DatePart::DayOfWeekMonday0 + | DatePart::DayOfYear => { + return_compute_error_with!(format!("{part} does not support"), self.data_type()) + } + } + } +} + +impl ExtractDatePartExt for PrimitiveArray { + fn date_part(&self, part: DatePart) -> Result { + match part { + DatePart::Week => { + Ok(self.unary_opt(|d| (d / (1_000 * 60 * 60 * 24 * 7)).try_into().ok())) + } + DatePart::Day => Ok(self.unary_opt(|d| (d / (1_000 * 60 * 60 * 24)).try_into().ok())), + DatePart::Hour => Ok(self.unary_opt(|d| (d / (1_000 * 60 * 60)).try_into().ok())), + DatePart::Minute => Ok(self.unary_opt(|d| (d / (1_000 * 60)).try_into().ok())), + DatePart::Second => Ok(self.unary_opt(|d| (d / 1_000).try_into().ok())), + DatePart::Millisecond => Ok(self.unary_opt(|d| d.try_into().ok())), + DatePart::Microsecond => { + Ok(self.unary_opt(|d| d.checked_mul(1_000).and_then(|d| d.try_into().ok()))) + } + DatePart::Nanosecond => { + Ok(self.unary_opt(|d| d.checked_mul(1_000_000).and_then(|d| d.try_into().ok()))) + } + + DatePart::Year + | DatePart::Quarter + | DatePart::Month + | DatePart::DayOfWeekSunday0 + | DatePart::DayOfWeekMonday0 + | DatePart::DayOfYear => { + return_compute_error_with!(format!("{part} does not support"), self.data_type()) + } + } + } +} + +impl ExtractDatePartExt for PrimitiveArray { + fn date_part(&self, part: DatePart) -> Result { + match part { + DatePart::Week => { + Ok(self.unary_opt(|d| (d / (1_000_000 * 60 * 60 * 24 * 7)).try_into().ok())) + } + DatePart::Day => { + Ok(self.unary_opt(|d| (d / (1_000_000 * 60 * 60 * 24)).try_into().ok())) + } + DatePart::Hour => Ok(self.unary_opt(|d| (d / (1_000_000 * 60 * 60)).try_into().ok())), + DatePart::Minute => Ok(self.unary_opt(|d| (d / (1_000_000 * 60)).try_into().ok())), + DatePart::Second => Ok(self.unary_opt(|d| (d / 1_000_000).try_into().ok())), + DatePart::Millisecond => Ok(self.unary_opt(|d| (d / 1_000).try_into().ok())), + DatePart::Microsecond => Ok(self.unary_opt(|d| d.try_into().ok())), + DatePart::Nanosecond => { + Ok(self.unary_opt(|d| d.checked_mul(1_000).and_then(|d| d.try_into().ok()))) + } + + DatePart::Year + | DatePart::Quarter + | DatePart::Month + | DatePart::DayOfWeekSunday0 + | DatePart::DayOfWeekMonday0 + | DatePart::DayOfYear => { + return_compute_error_with!(format!("{part} does not support"), self.data_type()) + } + } + } +} + +impl ExtractDatePartExt for PrimitiveArray { + fn date_part(&self, part: DatePart) -> Result { + match part { + DatePart::Week => { + Ok(self.unary_opt(|d| (d / (1_000_000_000 * 60 * 60 * 24 * 7)).try_into().ok())) + } + DatePart::Day => { + Ok(self.unary_opt(|d| (d / (1_000_000_000 * 60 * 60 * 24)).try_into().ok())) + } + DatePart::Hour => { + Ok(self.unary_opt(|d| (d / (1_000_000_000 * 60 * 60)).try_into().ok())) + } + DatePart::Minute => Ok(self.unary_opt(|d| (d / (1_000_000_000 * 60)).try_into().ok())), + DatePart::Second => Ok(self.unary_opt(|d| (d / 1_000_000_000).try_into().ok())), + DatePart::Millisecond => Ok(self.unary_opt(|d| (d / 1_000_000).try_into().ok())), + DatePart::Microsecond => Ok(self.unary_opt(|d| (d / 1_000).try_into().ok())), + DatePart::Nanosecond => Ok(self.unary_opt(|d| d.try_into().ok())), + + DatePart::Year + | DatePart::Quarter + | DatePart::Month + | DatePart::DayOfWeekSunday0 + | DatePart::DayOfWeekMonday0 + | DatePart::DayOfYear => { + return_compute_error_with!(format!("{part} does not support"), self.data_type()) + } + } + } +} + macro_rules! return_compute_error_with { ($msg:expr, $param:expr) => { return { Err(ArrowError::ComputeError(format!("{}: {:?}", $msg, $param))) } @@ -1796,4 +1937,148 @@ mod tests { IntervalMonthDayNano::ZERO, ])); } + + #[test] + fn test_duration_second() { + let input: DurationSecondArray = vec![0, 42, 60 * 60 * 24 + 1].into(); + + let actual = date_part(&input, DatePart::Second).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42, actual.value(1)); + assert_eq!(60 * 60 * 24 + 1, actual.value(2)); + + let actual = date_part(&input, DatePart::Millisecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42_000, actual.value(1)); + assert_eq!((60 * 60 * 24 + 1) * 1_000, actual.value(2)); + + let actual = date_part(&input, DatePart::Microsecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42_000_000, actual.value(1)); + assert_eq!(0, actual.value(2)); + + let actual = date_part(&input, DatePart::Nanosecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(0, actual.value(2)); + } + + #[test] + fn test_duration_millisecond() { + let input: DurationMillisecondArray = vec![0, 42, 60 * 60 * 24 + 1].into(); + + let actual = date_part(&input, DatePart::Second).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!((60 * 60 * 24 + 1) / 1_000, actual.value(2)); + + let actual = date_part(&input, DatePart::Millisecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42, actual.value(1)); + assert_eq!(60 * 60 * 24 + 1, actual.value(2)); + + let actual = date_part(&input, DatePart::Microsecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42_000, actual.value(1)); + assert_eq!((60 * 60 * 24 + 1) * 1_000, actual.value(2)); + + let actual = date_part(&input, DatePart::Nanosecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42_000_000, actual.value(1)); + assert_eq!(0, actual.value(2)); + } + + #[test] + fn test_duration_microsecond() { + let input: DurationMicrosecondArray = vec![0, 42, 60 * 60 * 24 + 1].into(); + + let actual = date_part(&input, DatePart::Second).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(0, actual.value(2)); + + let actual = date_part(&input, DatePart::Millisecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!((60 * 60 * 24 + 1) / 1_000, actual.value(2)); + + let actual = date_part(&input, DatePart::Microsecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42, actual.value(1)); + assert_eq!(60 * 60 * 24 + 1, actual.value(2)); + + let actual = date_part(&input, DatePart::Nanosecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42_000, actual.value(1)); + assert_eq!((60 * 60 * 24 + 1) * 1_000, actual.value(2)); + } + + #[test] + fn test_duration_nanosecond() { + let input: DurationNanosecondArray = vec![0, 42, 60 * 60 * 24 + 1].into(); + + let actual = date_part(&input, DatePart::Second).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(0, actual.value(2)); + + let actual = date_part(&input, DatePart::Millisecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!(0, actual.value(2)); + + let actual = date_part(&input, DatePart::Microsecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(0, actual.value(1)); + assert_eq!((60 * 60 * 24 + 1) / 1_000, actual.value(2)); + + let actual = date_part(&input, DatePart::Nanosecond).unwrap(); + let actual = actual.as_primitive::(); + assert_eq!(0, actual.value(0)); + assert_eq!(42, actual.value(1)); + assert_eq!(60 * 60 * 24 + 1, actual.value(2)); + } + + #[test] + fn test_duration_invalid_parts() { + fn ensure_returns_error(array: &dyn Array) { + let invalid_parts = [ + DatePart::Year, + DatePart::Quarter, + DatePart::Month, + DatePart::DayOfWeekSunday0, + DatePart::DayOfWeekMonday0, + DatePart::DayOfYear, + ]; + + for part in invalid_parts { + let err = date_part(array, part).unwrap_err(); + let expected = format!( + "Compute error: {part} does not support: {}", + array.data_type() + ); + assert_eq!(expected, err.to_string()); + } + } + + ensure_returns_error(&DurationSecondArray::from(vec![0])); + ensure_returns_error(&DurationMillisecondArray::from(vec![0])); + ensure_returns_error(&DurationMicrosecondArray::from(vec![0])); + ensure_returns_error(&DurationNanosecondArray::from(vec![0])); + } }