Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(expr): date_trunc and +/- on timestamptz during DST jump-back #13201

Merged
merged 1 commit into from
Nov 2, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
129 changes: 129 additions & 0 deletions e2e_test/batch/functions/issue_12072.slt.part
Original file line number Diff line number Diff line change
@@ -0,0 +1,129 @@
# Some exprs on `timestamptz` are implemented by a chain of 3 steps:
# * convert `timestamptz` into naive `timestamp` in the given zone
# * perform actual operation on the naive `timestamp`
# * convert naive `timestamp` in the given zone back to `timestamptz`
#
# This can lead to unintuitive results, because the first and last steps are not perfect inverse.

# Two different `timestamptz` values collapse into the same naive `timestamp`.
query T
select
'2023-11-05 08:40:00Z'::timestamptz AT TIME ZONE 'US/Pacific',
'2023-11-05 09:40:00Z'::timestamptz AT TIME ZONE 'US/Pacific';
----
2023-11-05 01:40:00 2023-11-05 01:40:00

# When ambiguous, the `timestamptz` after a jump-back transition is returned.
query T
select '2023-11-05 01:40:00'::timestamp AT TIME ZONE 'US/Pacific';
----
2023-11-05 09:40:00+00:00

# Tests below are about exprs following the 3-step pattern:
# * `date_trunc(field, timestamptz, zone)` -> `timestamptz`
# * `timestamptz + interval` -> `timestamptz`
# * Also used by `interval + timestamptz` and `timestamptz - interval`

# (A) intuitive cases without anomalies observed (but require special care behind the scenes)

query R
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz))
select extract(epoch from date_trunc('hour', v, 'US/Pacific')) from t order by id;
----
1699171200.000000
1699174800.000000

statement ok
set timezone = 'US/Pacific';

query T
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz))
select v + interval '5' minute from t order by id;
----
2023-11-05 01:45:00-07:00
2023-11-05 01:45:00-08:00

statement ok
set timezone = 'UTC';

# (B) common 1-hour jump-back for whole-hour zones

query R
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz))
select extract(epoch from date_trunc('day', v, 'US/Pacific')) from t order by id;
----
1699167600.000000
1699167600.000000

statement ok
set timezone = 'US/Pacific';

query T
with t(id, v) as (values
('a', '2023-11-05 01:40:00-07:00'::timestamptz),
('b', '2023-11-05 01:40:00-08:00'::timestamptz),
('c', '2023-11-04 01:40:00-07:00'::timestamptz))
select v + interval '1' day from t order by id;
----
2023-11-06 01:40:00-08:00
2023-11-06 01:40:00-08:00
2023-11-05 01:40:00-08:00

statement ok
set timezone = 'UTC';

# (C) 1-hour jump-back for half-hour zones

query T
with t(id, v) as (values
('a', '2023-04-01 15:58:00Z'::timestamptz),
('b', '2023-04-01 16:58:00Z'::timestamptz))
select date_trunc('hour', v, 'Australia/South') from t order by id;
----
2023-04-01 15:30:00+00:00
2023-04-01 16:30:00+00:00

# (D) half-hour jump-back
# Note that `01:45:00+10:30` is truncated to non-existent `01:00:00+10:30` whose canonical form is `01:30:00+11:00`

statement ok
set timezone = 'Australia/Lord_Howe';

query TTT
with t(id, v) as (values
('a', '2023-04-01 14:45:00Z'::timestamptz),
('b', '2023-04-01 15:15:00Z'::timestamptz))
select date_trunc('hour', v, 'Australia/Lord_Howe'), 'truncated from', v from t order by id;
----
2023-04-02 01:00:00+11:00 truncated from 2023-04-02 01:45:00+11:00
2023-04-02 01:30:00+11:00 truncated from 2023-04-02 01:45:00+10:30

statement ok
set timezone = 'UTC';

# (E) jump-back to day boundary, making it ambiguous
# Due to the disambiguation rule selecting post-transition value,
# `date_trunc` effectively returns an instant in the future, rather than the past.

query T
with t(id, v) as (values
('a', '2023-11-05 04:40:00Z'::timestamptz),
('b', '2023-11-05 05:40:00Z'::timestamptz))
select date_trunc('day', v, 'America/Havana') from t order by id;
----
2023-11-05 05:00:00+00:00
2023-11-05 05:00:00+00:00

# (F) jump-forward from day boundary, making it invalid
# Here `2023-09-02 23:59:59-04:00` is followed by `2023-09-03 01:00:00-03:00`.
# There is no `2023-09-03 00:00:00-04:00` or `2023-09-03 00:00:00-03:00`.
# PostgreSQL returns `2023-09-03 01:00:00-03:00` but it is hard using `chrono` crate.

statement error interpret
select date_trunc('day', '2023-09-03 12:00:00Z'::timestamptz, 'America/Santiago');
96 changes: 65 additions & 31 deletions src/expr/impl/src/scalar/date_trunc.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,24 +16,39 @@ use risingwave_common::types::{Interval, Timestamp, Timestamptz};
use risingwave_expr::expr::BoxedExpression;
use risingwave_expr::{build_function, function, ExprError, Result};

use super::timestamptz::{timestamp_at_time_zone, timestamptz_at_time_zone};
use super::timestamptz::timestamp_at_time_zone;

// TODO(xiangjinwu): parse into an enum
const MICROSECONDS: &str = "microseconds";
const MILLISECONDS: &str = "milliseconds";
const SECOND: &str = "second";
const MINUTE: &str = "minute";
const HOUR: &str = "hour";
const DAY: &str = "day";
const WEEK: &str = "week";
const MONTH: &str = "month";
const QUARTER: &str = "quarter";
const YEAR: &str = "year";
const DECADE: &str = "decade";
const CENTURY: &str = "century";
const MILLENNIUM: &str = "millennium";

#[function("date_trunc(varchar, timestamp) -> timestamp")]
pub fn date_trunc_timestamp(field: &str, ts: Timestamp) -> Result<Timestamp> {
Ok(match field.to_ascii_lowercase().as_str() {
"microseconds" => ts.truncate_micros(),
"milliseconds" => ts.truncate_millis(),
"second" => ts.truncate_second(),
"minute" => ts.truncate_minute(),
"hour" => ts.truncate_hour(),
"day" => ts.truncate_day(),
"week" => ts.truncate_week(),
"month" => ts.truncate_month(),
"quarter" => ts.truncate_quarter(),
"year" => ts.truncate_year(),
"decade" => ts.truncate_decade(),
"century" => ts.truncate_century(),
"millennium" => ts.truncate_millennium(),
MICROSECONDS => ts.truncate_micros(),
MILLISECONDS => ts.truncate_millis(),
SECOND => ts.truncate_second(),
MINUTE => ts.truncate_minute(),
HOUR => ts.truncate_hour(),
DAY => ts.truncate_day(),
WEEK => ts.truncate_week(),
MONTH => ts.truncate_month(),
QUARTER => ts.truncate_quarter(),
YEAR => ts.truncate_year(),
DECADE => ts.truncate_decade(),
CENTURY => ts.truncate_century(),
MILLENNIUM => ts.truncate_millennium(),
_ => return Err(invalid_field_error(field)),
})
}
Expand All @@ -52,33 +67,52 @@ fn build_date_trunc_timestamptz_implicit_zone(
#[function("date_trunc(varchar, timestamptz, varchar) -> timestamptz")]
pub fn date_trunc_timestamptz_at_timezone(
field: &str,
ts: Timestamptz,
tsz: Timestamptz,
timezone: &str,
) -> Result<Timestamptz> {
let timestamp = timestamptz_at_time_zone(ts, timezone)?;
let truncated = date_trunc_timestamp(field, timestamp)?;
timestamp_at_time_zone(truncated, timezone)
use chrono::Offset as _;

use super::timestamptz::time_zone_err;

let tz = Timestamptz::lookup_time_zone(timezone).map_err(time_zone_err)?;
let instant_local = tsz.to_datetime_in_zone(tz);

let truncated_naive = date_trunc_timestamp(field, Timestamp(instant_local.naive_local()))?;

match field.to_ascii_lowercase().as_str() {
MICROSECONDS | MILLISECONDS | SECOND | MINUTE | HOUR => {
// When unit < day, follow PostgreSQL to use old timezone offset.
// rather than reinterpret it in the timezone.
// https://github.com/postgres/postgres/blob/REL_16_0/src/backend/utils/adt/timestamp.c#L4270
// See `e2e_test/batch/functions/issue_12072.slt.part` for the difference.
let fixed = instant_local.offset().fix();
// `unwrap` is okay because `FixedOffset` always returns single unique conversion result.
let truncated_local = truncated_naive.0.and_local_timezone(fixed).unwrap();
Ok(Timestamptz::from_micros(truncated_local.timestamp_micros()))
}
_ => timestamp_at_time_zone(truncated_naive, timezone),
}
}

#[function("date_trunc(varchar, interval) -> interval")]
pub fn date_trunc_interval(field: &str, interval: Interval) -> Result<Interval> {
Ok(match field.to_ascii_lowercase().as_str() {
"microseconds" => interval,
"milliseconds" => interval.truncate_millis(),
"second" => interval.truncate_second(),
"minute" => interval.truncate_minute(),
"hour" => interval.truncate_hour(),
"day" => interval.truncate_day(),
"week" => return Err(ExprError::UnsupportedFunction(
MICROSECONDS => interval,
MILLISECONDS => interval.truncate_millis(),
SECOND => interval.truncate_second(),
MINUTE => interval.truncate_minute(),
HOUR => interval.truncate_hour(),
DAY => interval.truncate_day(),
WEEK => return Err(ExprError::UnsupportedFunction(
"interval units \"week\" not supported because months usually have fractional weeks"
.into(),
)),
"month" => interval.truncate_month(),
"quarter" => interval.truncate_quarter(),
"year" => interval.truncate_year(),
"decade" => interval.truncate_decade(),
"century" => interval.truncate_century(),
"millennium" => interval.truncate_millennium(),
MONTH => interval.truncate_month(),
QUARTER => interval.truncate_quarter(),
YEAR => interval.truncate_year(),
DECADE => interval.truncate_decade(),
CENTURY => interval.truncate_century(),
MILLENNIUM => interval.truncate_millennium(),
_ => return Err(invalid_field_error(field)),
})
}
Expand Down
19 changes: 12 additions & 7 deletions src/expr/impl/src/scalar/timestamptz.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,19 +133,24 @@ pub fn timestamptz_interval_add(
interval: Interval,
time_zone: &str,
) -> Result<Timestamptz> {
use num_traits::Zero as _;

// A month may have 28-31 days, a day may have 23 or 25 hours during Daylight Saving switch.
// So their interpretation depends on the local time of a specific zone.
let qualitative = interval.truncate_day();
// Units smaller than `day` are zone agnostic.
let quantitative = interval - qualitative;

// Note the current implementation simply follows the frontend-rewrite chains of exprs before
// refactor. There may be chances to improve.
let t = timestamptz_at_time_zone(input, time_zone)?;
let t = t
.checked_add(qualitative)
.ok_or(ExprError::NumericOverflow)?;
let t = timestamp_at_time_zone(t, time_zone)?;
let mut t = input;
if !qualitative.is_zero() {
// Only convert into and from naive local when necessary because it is lossy.
// See `e2e_test/batch/functions/issue_12072.slt.part` for the difference.
let naive = timestamptz_at_time_zone(t, time_zone)?;
let naive = naive
.checked_add(qualitative)
.ok_or(ExprError::NumericOverflow)?;
t = timestamp_at_time_zone(naive, time_zone)?;
}
let t = timestamptz_interval_quantitative(t, quantitative, i64::checked_add)?;
Ok(t)
}
Expand Down
Loading