Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(expr): support interval to_char() #14071

Merged
merged 7 commits into from
Dec 21, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
88 changes: 88 additions & 0 deletions e2e_test/batch/functions/to_char.slt.part
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
statement ok
SET RW_IMPLICIT_FLUSH TO true;

query T
SELECT to_char(timestamp '2002-04-20 17:31:12.66', 'HH12:MI:SS')
----
Expand Down Expand Up @@ -66,3 +69,88 @@ select to_char(tsz, 'YYYY-MM-DD HH24:MI:SS TZH:TZM') from t order by tsz;

statement ok
drop table t;


query T
select to_char('-20459year -256 days -120hours 866seconds'::interval, 'YYYY IYYY YY IY MM DD PM pm HH HH12 HH24 MI SS');
----
-20459 -20460 -59 -60 00 -256 AM am -11 -11 -119 -45 -34

query T
select to_char('0year -256 days -120hours'::interval, 'YYYY IYYY YY IY MM DD PM pm HH HH12 HH24 MI SS');
----
0000 -001 00 -1 00 -256 AM am 012 012 -120 00 00

query T
select to_char('0year 0 days 0hours'::interval, 'YYYY IYYY YY IY MM DD PM pm HH12 HH24 MI SS');
----
0000 -001 00 -1 00 00 AM am 12 00 00 00

query T
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'YYYY IYYY YY IY MM DD PM pm HH12 HH24 MI SS MS US');
----
0001 0001 01 01 01 01 AM am 01 01 01 01 000 000000

query T
select to_char('-1year -1month -1day -1hours -1minute -1second'::interval, 'YYYY IYYY YY IY MM DD PM pm HH12 HH24 MI SS MS US');
----
-0001 -0002 -01 -02 -01 -1 AM am -01 -01 -01 -01 000 000000

query T
select to_char('23:22:57.124562'::interval, 'HH12 MI SS MS US');
----
11 22 57 124 124562

query T
select to_char('-23:22:57.124562'::interval, 'HH12 MI SS MS US');
----
-11 -22 -57 -124 -124562

query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MM SS tzhtzm');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS TZH:TZM');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS TZH');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS Month');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS Mon');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.
270 changes: 266 additions & 4 deletions src/expr/impl/src/scalar/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,24 @@ use std::fmt::{Debug, Write};
use std::sync::LazyLock;

use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use chrono::format::StrftimeItems;
use risingwave_common::types::{Timestamp, Timestamptz};
use chrono::format::{Item, StrftimeItems};
use chrono::{Datelike, NaiveDate};
use risingwave_common::types::{Interval, Timestamp, Timestamptz};
use risingwave_expr::{function, ExprError, Result};

use super::timestamptz::time_zone_err;
use crate::scalar::arithmetic_op::timestamp_interval_add;

type Pattern<'a> = Vec<chrono::format::Item<'a>>;

#[inline(always)]
fn invalid_pattern_err() -> ExprError {
ExprError::InvalidParam {
name: "pattern",
reason: "invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.".into(),
}
}

self_cell::self_cell! {
pub struct ChronoPattern {
owner: String,
Expand Down Expand Up @@ -97,10 +107,73 @@ impl ChronoPattern {
.expect("failed to build an Aho-Corasick automaton")
});

ChronoPattern::compile_inner(tmpl, PATTERNS, &AC)
}

pub fn compile_for_interval(tmpl: &str) -> ChronoPattern {
// mapping from pg pattern to chrono pattern
// pg pattern: https://www.postgresql.org/docs/current/functions-formatting.html
// chrono pattern: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
const PATTERNS: &[(&str, &str)] = &[
("HH24", "%H"),
("hh24", "%H"),
("HH12", "%I"),
("hh12", "%I"),
("HH", "%I"),
("hh", "%I"),
("AM", "%p"),
("PM", "%p"),
("am", "%P"),
("pm", "%P"),
("MI", "%M"),
("mi", "%M"),
("SS", "%S"),
("ss", "%S"),
("YYYY", "%Y"),
("yyyy", "%Y"),
("YY", "%y"),
("yy", "%y"),
("IYYY", "%G"),
("iyyy", "%G"),
("IY", "%g"),
("iy", "%g"),
("MM", "%m"),
("mm", "%m"),
("Month", "%B"),
("Mon", "%b"),
("DD", "%d"),
("dd", "%d"),
("US", "%.6f"), /* "%6f" and "%3f" are converted to private data structures in chrono, so we use "%.6f" and "%.3f" instead. */
("us", "%.6f"),
("MS", "%.3f"),
("ms", "%.3f"),
("TZH:TZM", "%:z"),
("tzh:tzm", "%:z"),
("TZHTZM", "%z"),
("tzhtzm", "%z"),
("TZH", "%#z"),
("tzh", "%#z"),
];
// build an Aho-Corasick automaton for fast matching
static AC: LazyLock<AhoCorasick> = LazyLock::new(|| {
AhoCorasickBuilder::new()
.ascii_case_insensitive(false)
.match_kind(aho_corasick::MatchKind::LeftmostLongest)
.build(PATTERNS.iter().map(|(k, _)| k))
.expect("failed to build an Aho-Corasick automaton")
});
ChronoPattern::compile_inner(tmpl, PATTERNS, &AC)
}

fn compile_inner(
tmpl: &str,
patterns: &[(&str, &str)],
ac: &LazyLock<AhoCorasick>,
) -> ChronoPattern {
// replace all pg patterns with chrono patterns
let mut chrono_tmpl = String::new();
AC.replace_all_with(tmpl, &mut chrono_tmpl, |mat, _, dst| {
dst.push_str(PATTERNS[mat.pattern()].1);
ac.replace_all_with(tmpl, &mut chrono_tmpl, |mat, _, dst| {
dst.push_str(patterns[mat.pattern()].1);
true
});
tracing::debug!(tmpl, chrono_tmpl, "compile_pattern_to_chrono");
Expand Down Expand Up @@ -138,3 +211,192 @@ fn timestamptz_to_char3(
write!(writer, "{}", format).unwrap();
Ok(())
}

#[function(
"to_char(interval, varchar) -> varchar",
prebuild = "ChronoPattern::compile_for_interval($1)"
)]
fn interval_to_char(
interval: Interval,
pattern: &ChronoPattern,
writer: &mut impl Write,
) -> Result<()> {
for iter in pattern.borrow_dependent() {
format_inner(writer, interval, iter)?;
}
Ok(())
}

fn adjust_to_iso_year(interval: Interval) -> Result<i32> {
let start = risingwave_common::types::Timestamp(
NaiveDate::from_ymd_opt(0, 1, 1)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap(),
);
let interval = Interval::from_month_day_usec(interval.months(), interval.days(), 0);
let date = timestamp_interval_add(start, interval)?;
Ok(date.0.iso_week().year())
}

fn format_inner(w: &mut impl Write, interval: Interval, item: &Item<'_>) -> Result<()> {
match *item {
Item::Literal(s) | Item::Space(s) => {
w.write_str(s).unwrap();
Ok(())
}
Item::OwnedLiteral(ref s) | Item::OwnedSpace(ref s) => {
w.write_str(s).unwrap();
Ok(())
}
Item::Numeric(ref spec, _) => {
use chrono::format::Numeric::*;
match *spec {
Year => {
let year = interval.years_field();
if year < 0 {
write!(w, "{:+05}", year).unwrap();
} else {
write!(w, "{:04}", year).unwrap();
}
}
YearMod100 => {
let year = interval.years_field();
if year % 100 < 0 {
let year = -((-year) % 100);
write!(w, "{:+03}", year).unwrap();
} else {
let year = year % 100;
write!(w, "{:02}", year).unwrap();
}
}
IsoYear => {
let iso_year = adjust_to_iso_year(interval)?;
if interval.years_field() < 0 {
write!(w, "{:+05}", iso_year).unwrap();
} else {
write!(w, "{:04}", iso_year).unwrap();
}
}
IsoYearMod100 => {
let iso_year = adjust_to_iso_year(interval)?;
if interval.years_field() % 100 < 0 {
let iso_year = -((-iso_year) % 100);
write!(w, "{:+03}", iso_year).unwrap();
} else {
let iso_year = iso_year % 100;
write!(w, "{:02}", iso_year).unwrap();
}
}
Month => {
let month = interval.months_field();
if month < 0 {
write!(w, "{:+03}", month).unwrap();
} else {
write!(w, "{:02}", month).unwrap();
}
}
Day => {
let day = interval.days_field();
if day < 0 {
write!(w, "{:+02}", day).unwrap();
} else {
write!(w, "{:02}", day).unwrap();
}
}
Hour => {
let hour = interval.hours_field();
if hour < 0 {
write!(w, "{:+03}", hour).unwrap();
} else {
write!(w, "{:02}", hour).unwrap();
}
}
Hour12 => {
let hour = interval.hours_field();
if hour < 0 {
// here to align with postgres, we format -0 as 012.
let hour = -(-hour) % 12;
if hour == 0 {
w.write_str("012").unwrap();
} else {
write!(w, "{:+03}", hour).unwrap();
}
} else {
let hour = if hour % 12 == 0 { 12 } else { hour % 12 };
write!(w, "{:02}", hour).unwrap();
}
}
Minute => {
let minute = interval.usecs() / 1_000_000 / 60;
if minute % 60 < 0 {
let minute = -((-minute) % 60);
write!(w, "{:+03}", minute).unwrap();
} else {
let minute = minute % 60;
write!(w, "{:02}", minute).unwrap();
}
}
Second => {
let second = interval.usecs() / 1_000_000;
if second % 60 < 0 {
let second = -((-second) % 60);
write!(w, "{:+03}", second).unwrap();
} else {
let second = second % 60;
write!(w, "{:02}", second).unwrap();
}
}
Nanosecond | Ordinal | WeekdayFromMon | NumDaysFromSun | IsoWeek | WeekFromSun
| WeekFromMon | IsoYearDiv100 | Timestamp | YearDiv100 | Internal(_) => {
unreachable!()
}
}
Ok(())
}
Item::Fixed(ref spec) => {
use chrono::format::Fixed::*;
match *spec {
LowerAmPm => {
if interval.hours_field() % 24 >= 12 {
w.write_str("pm").unwrap();
} else {
w.write_str("am").unwrap();
}
Ok(())
}
UpperAmPm => {
if interval.hours_field() % 24 >= 12 {
w.write_str("PM").unwrap();
} else {
w.write_str("AM").unwrap();
}
Ok(())
}
Nanosecond3 => {
let usec = interval.usecs() % 1_000_000;
write!(w, "{:03}", usec / 1000).unwrap();
Ok(())
}
Nanosecond6 => {
let usec = interval.usecs() % 1_000_000;
write!(w, "{:06}", usec).unwrap();
Ok(())
}
Internal(_) | ShortMonthName | LongMonthName | TimezoneOffset | TimezoneOffsetZ
| TimezoneOffsetColon => Err(invalid_pattern_err()),
ShortWeekdayName
| LongWeekdayName
| TimezoneName
| TimezoneOffsetDoubleColon
| TimezoneOffsetTripleColon
| TimezoneOffsetColonZ
| Nanosecond
| Nanosecond9
| RFC2822
| RFC3339 => unreachable!(),
}
}
Item::Error => Err(invalid_pattern_err()),
}
}
Loading