Skip to content

Commit

Permalink
fix(expr): date_trunc and +/- on timestamptz during DST jump-back
Browse files Browse the repository at this point in the history
  • Loading branch information
xiangjinwu committed Nov 1, 2023
1 parent 6bd797e commit 88be4d2
Show file tree
Hide file tree
Showing 3 changed files with 206 additions and 38 deletions.
129 changes: 129 additions & 0 deletions e2e_test/batch/functions/issue_12072.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Some exprs on `timestamptz` are implemented by a chain of 3 steps:
# * convert `timestamptz` into naive `timestamp` in the given zone
# * perform actual operation on the naive `timestamp`
# * convert naive `timestamp` in the given zone back to `timestamptz`
#
# This can lead to unintuitive results, because the first and last steps are not perfect inverse.

# Two different `timestamptz` values collapse into the same naive `timestamp`.
query T
select
'2023-11-05 08:40:00Z'::timestamptz AT TIME ZONE 'US/Pacific',
'2023-11-05 09:40:00Z'::timestamptz AT TIME ZONE 'US/Pacific';
----
2023-11-05 01:40:00 2023-11-05 01:40:00

# When ambiguous, the `timestamptz` after a jump-back transition is returned.
query T
select '2023-11-05 01:40:00'::timestamp AT TIME ZONE 'US/Pacific';
----
2023-11-05 09:40:00+00:00

# Tests below are about exprs following the 3-step pattern:
# * `date_trunc(field, timestamptz, zone)` -> `timestamptz`
# * `timestamptz + interval` -> `timestamptz`
# * Also used by `interval + timestamptz` and `timestamptz - interval`

# (A) intuitive cases without anomalies observed (but require special care behind the scenes)

query R
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz))
select extract(epoch from date_trunc('hour', v, 'US/Pacific')) from t order by id;
----
1699171200.000000
1699174800.000000

statement ok
set timezone = 'US/Pacific';

query T
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz))
select v + interval '5' minute from t order by id;
----
2023-11-05 01:45:00-07:00
2023-11-05 01:45:00-08:00

statement ok
set timezone = 'UTC';

# (B) common 1-hour jump-back for whole-hour zones

query R
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz))
select extract(epoch from date_trunc('day', v, 'US/Pacific')) from t order by id;
----
1699167600.000000
1699167600.000000

statement ok
set timezone = 'US/Pacific';

query T
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz),
('c', '2023-11-04 01:40:00-07:00'::timestamptz))
select v + interval '1' day from t order by id;
----
2023-11-06 01:40:00-08:00
2023-11-06 01:40:00-08:00
2023-11-05 01:40:00-08:00

statement ok
set timezone = 'UTC';

# (C) 1-hour jump-back for half-hour zones

query T
with t(id, v) as (values
('a', '2023-04-01 15:58:00Z'::timestamptz),
('b', '2023-04-01 16:58:00Z'::timestamptz))
select date_trunc('hour', v, 'Australia/South') from t order by id;
----
2023-04-01 15:30:00+00:00
2023-04-01 16:30:00+00:00

# (D) half-hour jump-back
# Note that `01:45:00+10:30` is truncated to non-existent `01:00:00+10:30` whose canonical form is `01:30:00+11:00`

statement ok
set timezone = 'Australia/Lord_Howe';

query TTT
with t(id, v) as (values
('a', '2023-04-01 14:45:00Z'::timestamptz),
('b', '2023-04-01 15:15:00Z'::timestamptz))
select date_trunc('hour', v, 'Australia/Lord_Howe'), 'truncated from', v from t order by id;
----
2023-04-02 01:00:00+11:00 truncated from 2023-04-02 01:45:00+11:00
2023-04-02 01:30:00+11:00 truncated from 2023-04-02 01:45:00+10:30

statement ok
set timezone = 'UTC';

# (E) jump-back to day boundary, making it ambiguous
# Due to the disambiguation rule selecting post-transition value,
# `date_trunc` effectively returns an instant in the future, rather than the past.

query T
with t(id, v) as (values
('a', '2023-11-05 04:40:00Z'::timestamptz),
('b', '2023-11-05 05:40:00Z'::timestamptz))
select date_trunc('day', v, 'America/Havana') from t order by id;
----
2023-11-05 05:00:00+00:00
2023-11-05 05:00:00+00:00

# (F) jump-forward from day boundary, making it invalid
# Here `2023-09-02 23:59:59-04:00` is followed by `2023-09-03 01:00:00-03:00`.
# There is no `2023-09-03 00:00:00-04:00` or `2023-09-03 00:00:00-03:00`.
# PostgreSQL returns `2023-09-03 01:00:00-03:00` but it is hard using `chrono` crate.

statement error interpret
select date_trunc('day', '2023-09-03 12:00:00Z'::timestamptz, 'America/Santiago');
96 changes: 65 additions & 31 deletions src/expr/impl/src/scalar/date_trunc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,39 @@ use risingwave_common::types::{Interval, Timestamp, Timestamptz};
use risingwave_expr::expr::BoxedExpression;
use risingwave_expr::{build_function, function, ExprError, Result};

use super::timestamptz::{timestamp_at_time_zone, timestamptz_at_time_zone};
use super::timestamptz::timestamp_at_time_zone;

// TODO(xiangjinwu): parse into an enum
const MICROSECONDS: &str = "microseconds";
const MILLISECONDS: &str = "milliseconds";
const SECOND: &str = "second";
const MINUTE: &str = "minute";
const HOUR: &str = "hour";
const DAY: &str = "day";
const WEEK: &str = "week";
const MONTH: &str = "month";
const QUARTER: &str = "quarter";
const YEAR: &str = "year";
const DECADE: &str = "decade";
const CENTURY: &str = "century";
const MILLENNIUM: &str = "millennium";

#[function("date_trunc(varchar, timestamp) -> timestamp")]
pub fn date_trunc_timestamp(field: &str, ts: Timestamp) -> Result<Timestamp> {
Ok(match field.to_ascii_lowercase().as_str() {
"microseconds" => ts.truncate_micros(),
"milliseconds" => ts.truncate_millis(),
"second" => ts.truncate_second(),
"minute" => ts.truncate_minute(),
"hour" => ts.truncate_hour(),
"day" => ts.truncate_day(),
"week" => ts.truncate_week(),
"month" => ts.truncate_month(),
"quarter" => ts.truncate_quarter(),
"year" => ts.truncate_year(),
"decade" => ts.truncate_decade(),
"century" => ts.truncate_century(),
"millennium" => ts.truncate_millennium(),
MICROSECONDS => ts.truncate_micros(),
MILLISECONDS => ts.truncate_millis(),
SECOND => ts.truncate_second(),
MINUTE => ts.truncate_minute(),
HOUR => ts.truncate_hour(),
DAY => ts.truncate_day(),
WEEK => ts.truncate_week(),
MONTH => ts.truncate_month(),
QUARTER => ts.truncate_quarter(),
YEAR => ts.truncate_year(),
DECADE => ts.truncate_decade(),
CENTURY => ts.truncate_century(),
MILLENNIUM => ts.truncate_millennium(),
_ => return Err(invalid_field_error(field)),
})
}
Expand All @@ -52,33 +67,52 @@ fn build_date_trunc_timestamptz_implicit_zone(
#[function("date_trunc(varchar, timestamptz, varchar) -> timestamptz")]
pub fn date_trunc_timestamptz_at_timezone(
field: &str,
ts: Timestamptz,
tsz: Timestamptz,
timezone: &str,
) -> Result<Timestamptz> {
let timestamp = timestamptz_at_time_zone(ts, timezone)?;
let truncated = date_trunc_timestamp(field, timestamp)?;
timestamp_at_time_zone(truncated, timezone)
use chrono::Offset as _;

use super::timestamptz::time_zone_err;

let tz = Timestamptz::lookup_time_zone(timezone).map_err(time_zone_err)?;
let instant_local = tsz.to_datetime_in_zone(tz);

let truncated_naive = date_trunc_timestamp(field, Timestamp(instant_local.naive_local()))?;

match field.to_ascii_lowercase().as_str() {
MICROSECONDS | MILLISECONDS | SECOND | MINUTE | HOUR => {
// When unit < day, follow PostgreSQL to use old timezone offset.
// rather than reinterpret it in the timezone.
// https://github.com/postgres/postgres/blob/REL_16_0/src/backend/utils/adt/timestamp.c#L4270
// See `e2e_test/batch/functions/issue_12072.slt.part` for the difference.
let fixed = instant_local.offset().fix();
// `unwrap` is okay because `FixedOffset` always returns single unique conversion result.
let truncated_local = truncated_naive.0.and_local_timezone(fixed).unwrap();
Ok(Timestamptz::from_micros(truncated_local.timestamp_micros()))
}
_ => timestamp_at_time_zone(truncated_naive, timezone),
}
}

#[function("date_trunc(varchar, interval) -> interval")]
pub fn date_trunc_interval(field: &str, interval: Interval) -> Result<Interval> {
Ok(match field.to_ascii_lowercase().as_str() {
"microseconds" => interval,
"milliseconds" => interval.truncate_millis(),
"second" => interval.truncate_second(),
"minute" => interval.truncate_minute(),
"hour" => interval.truncate_hour(),
"day" => interval.truncate_day(),
"week" => return Err(ExprError::UnsupportedFunction(
MICROSECONDS => interval,
MILLISECONDS => interval.truncate_millis(),
SECOND => interval.truncate_second(),
MINUTE => interval.truncate_minute(),
HOUR => interval.truncate_hour(),
DAY => interval.truncate_day(),
WEEK => return Err(ExprError::UnsupportedFunction(
"interval units \"week\" not supported because months usually have fractional weeks"
.into(),
)),
"month" => interval.truncate_month(),
"quarter" => interval.truncate_quarter(),
"year" => interval.truncate_year(),
"decade" => interval.truncate_decade(),
"century" => interval.truncate_century(),
"millennium" => interval.truncate_millennium(),
MONTH => interval.truncate_month(),
QUARTER => interval.truncate_quarter(),
YEAR => interval.truncate_year(),
DECADE => interval.truncate_decade(),
CENTURY => interval.truncate_century(),
MILLENNIUM => interval.truncate_millennium(),
_ => return Err(invalid_field_error(field)),
})
}
Expand Down
19 changes: 12 additions & 7 deletions src/expr/impl/src/scalar/timestamptz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,19 +133,24 @@ pub fn timestamptz_interval_add(
interval: Interval,
time_zone: &str,
) -> Result<Timestamptz> {
use num_traits::Zero as _;

// A month may have 28-31 days, a day may have 23 or 25 hours during Daylight Saving switch.
// So their interpretation depends on the local time of a specific zone.
let qualitative = interval.truncate_day();
// Units smaller than `day` are zone agnostic.
let quantitative = interval - qualitative;

// Note the current implementation simply follows the frontend-rewrite chains of exprs before
// refactor. There may be chances to improve.
let t = timestamptz_at_time_zone(input, time_zone)?;
let t = t
.checked_add(qualitative)
.ok_or(ExprError::NumericOverflow)?;
let t = timestamp_at_time_zone(t, time_zone)?;
let mut t = input;
if !qualitative.is_zero() {
// Only convert into and from naive local when necessary because it is lossy.
// See `e2e_test/batch/functions/issue_12072.slt.part` for the difference.
let naive = timestamptz_at_time_zone(t, time_zone)?;
let naive = naive
.checked_add(qualitative)
.ok_or(ExprError::NumericOverflow)?;
t = timestamp_at_time_zone(naive, time_zone)?;
}
let t = timestamptz_interval_quantitative(t, quantitative, i64::checked_add)?;
Ok(t)
}
Expand Down

0 comments on commit 88be4d2

Please sign in to comment.