Skip to content

Commit

Permalink
Merge branch 'release-1.5' into wrj/cherry-pick-14090
Browse files Browse the repository at this point in the history
  • Loading branch information
huangjw806 authored Dec 25, 2023
2 parents 9a98cb4 + 24ae3bc commit c6410f1
Show file tree
Hide file tree
Showing 9 changed files with 522 additions and 73 deletions.
88 changes: 88 additions & 0 deletions e2e_test/batch/functions/to_char.slt.part
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
statement ok
SET RW_IMPLICIT_FLUSH TO true;

query T
SELECT to_char(timestamp '2002-04-20 17:31:12.66', 'HH12:MI:SS')
----
Expand Down Expand Up @@ -66,3 +69,88 @@ select to_char(tsz, 'YYYY-MM-DD HH24:MI:SS TZH:TZM') from t order by tsz;

statement ok
drop table t;


query T
select to_char('-20459year -256 days -120hours 866seconds'::interval, 'YYYY IYYY YY IY MM DD PM pm HH HH12 HH24 MI SS');
----
-20459 -20460 -59 -60 00 -256 AM am -11 -11 -119 -45 -34

query T
select to_char('0year -256 days -120hours'::interval, 'YYYY IYYY YY IY MM DD PM pm HH HH12 HH24 MI SS');
----
0000 -001 00 -1 00 -256 AM am 012 012 -120 00 00

query T
select to_char('0year 0 days 0hours'::interval, 'YYYY IYYY YY IY MM DD PM pm HH12 HH24 MI SS');
----
0000 -001 00 -1 00 00 AM am 12 00 00 00

query T
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'YYYY IYYY YY IY MM DD PM pm HH12 HH24 MI SS MS US');
----
0001 0001 01 01 01 01 AM am 01 01 01 01 000 000000

query T
select to_char('-1year -1month -1day -1hours -1minute -1second'::interval, 'YYYY IYYY YY IY MM DD PM pm HH12 HH24 MI SS MS US');
----
-0001 -0002 -01 -02 -01 -1 AM am -01 -01 -01 -01 000 000000

query T
select to_char('23:22:57.124562'::interval, 'HH12 MI SS MS US');
----
11 22 57 124 124562

query T
select to_char('-23:22:57.124562'::interval, 'HH12 MI SS MS US');
----
-11 -22 -57 -124 -124562

query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MM SS tzhtzm');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS TZH:TZM');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS TZH');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS Month');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.


query error
select to_char('1year 1month 1day 1hours 1minute 1second'::interval, 'IY MM DD AM HH12 MI SS Mon');
----
db error: ERROR: Failed to run the query

Caused by these errors (recent errors listed first):
1: Expr error
2: Invalid parameter pattern: invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.
270 changes: 266 additions & 4 deletions src/expr/impl/src/scalar/to_char.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,14 +16,24 @@ use std::fmt::{Debug, Write};
use std::sync::LazyLock;

use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use chrono::format::StrftimeItems;
use risingwave_common::types::{Timestamp, Timestamptz};
use chrono::format::{Item, StrftimeItems};
use chrono::{Datelike, NaiveDate};
use risingwave_common::types::{Interval, Timestamp, Timestamptz};
use risingwave_expr::{function, ExprError, Result};

use super::timestamptz::time_zone_err;
use crate::scalar::arithmetic_op::timestamp_interval_add;

type Pattern<'a> = Vec<chrono::format::Item<'a>>;

#[inline(always)]
fn invalid_pattern_err() -> ExprError {
ExprError::InvalidParam {
name: "pattern",
reason: "invalid format specification for an interval value, HINT: Intervals are not tied to specific calendar dates.".into(),
}
}

self_cell::self_cell! {
pub struct ChronoPattern {
owner: String,
Expand Down Expand Up @@ -97,10 +107,73 @@ impl ChronoPattern {
.expect("failed to build an Aho-Corasick automaton")
});

ChronoPattern::compile_inner(tmpl, PATTERNS, &AC)
}

pub fn compile_for_interval(tmpl: &str) -> ChronoPattern {
// mapping from pg pattern to chrono pattern
// pg pattern: https://www.postgresql.org/docs/current/functions-formatting.html
// chrono pattern: https://docs.rs/chrono/latest/chrono/format/strftime/index.html
const PATTERNS: &[(&str, &str)] = &[
("HH24", "%H"),
("hh24", "%H"),
("HH12", "%I"),
("hh12", "%I"),
("HH", "%I"),
("hh", "%I"),
("AM", "%p"),
("PM", "%p"),
("am", "%P"),
("pm", "%P"),
("MI", "%M"),
("mi", "%M"),
("SS", "%S"),
("ss", "%S"),
("YYYY", "%Y"),
("yyyy", "%Y"),
("YY", "%y"),
("yy", "%y"),
("IYYY", "%G"),
("iyyy", "%G"),
("IY", "%g"),
("iy", "%g"),
("MM", "%m"),
("mm", "%m"),
("Month", "%B"),
("Mon", "%b"),
("DD", "%d"),
("dd", "%d"),
("US", "%.6f"), /* "%6f" and "%3f" are converted to private data structures in chrono, so we use "%.6f" and "%.3f" instead. */
("us", "%.6f"),
("MS", "%.3f"),
("ms", "%.3f"),
("TZH:TZM", "%:z"),
("tzh:tzm", "%:z"),
("TZHTZM", "%z"),
("tzhtzm", "%z"),
("TZH", "%#z"),
("tzh", "%#z"),
];
// build an Aho-Corasick automaton for fast matching
static AC: LazyLock<AhoCorasick> = LazyLock::new(|| {
AhoCorasickBuilder::new()
.ascii_case_insensitive(false)
.match_kind(aho_corasick::MatchKind::LeftmostLongest)
.build(PATTERNS.iter().map(|(k, _)| k))
.expect("failed to build an Aho-Corasick automaton")
});
ChronoPattern::compile_inner(tmpl, PATTERNS, &AC)
}

fn compile_inner(
tmpl: &str,
patterns: &[(&str, &str)],
ac: &LazyLock<AhoCorasick>,
) -> ChronoPattern {
// replace all pg patterns with chrono patterns
let mut chrono_tmpl = String::new();
AC.replace_all_with(tmpl, &mut chrono_tmpl, |mat, _, dst| {
dst.push_str(PATTERNS[mat.pattern()].1);
ac.replace_all_with(tmpl, &mut chrono_tmpl, |mat, _, dst| {
dst.push_str(patterns[mat.pattern()].1);
true
});
tracing::debug!(tmpl, chrono_tmpl, "compile_pattern_to_chrono");
Expand Down Expand Up @@ -138,3 +211,192 @@ fn timestamptz_to_char3(
write!(writer, "{}", format).unwrap();
Ok(())
}

#[function(
"to_char(interval, varchar) -> varchar",
prebuild = "ChronoPattern::compile_for_interval($1)"
)]
fn interval_to_char(
interval: Interval,
pattern: &ChronoPattern,
writer: &mut impl Write,
) -> Result<()> {
for iter in pattern.borrow_dependent() {
format_inner(writer, interval, iter)?;
}
Ok(())
}

fn adjust_to_iso_year(interval: Interval) -> Result<i32> {
let start = risingwave_common::types::Timestamp(
NaiveDate::from_ymd_opt(0, 1, 1)
.unwrap()
.and_hms_opt(0, 0, 0)
.unwrap(),
);
let interval = Interval::from_month_day_usec(interval.months(), interval.days(), 0);
let date = timestamp_interval_add(start, interval)?;
Ok(date.0.iso_week().year())
}

fn format_inner(w: &mut impl Write, interval: Interval, item: &Item<'_>) -> Result<()> {
match *item {
Item::Literal(s) | Item::Space(s) => {
w.write_str(s).unwrap();
Ok(())
}
Item::OwnedLiteral(ref s) | Item::OwnedSpace(ref s) => {
w.write_str(s).unwrap();
Ok(())
}
Item::Numeric(ref spec, _) => {
use chrono::format::Numeric::*;
match *spec {
Year => {
let year = interval.years_field();
if year < 0 {
write!(w, "{:+05}", year).unwrap();
} else {
write!(w, "{:04}", year).unwrap();
}
}
YearMod100 => {
let year = interval.years_field();
if year % 100 < 0 {
let year = -((-year) % 100);
write!(w, "{:+03}", year).unwrap();
} else {
let year = year % 100;
write!(w, "{:02}", year).unwrap();
}
}
IsoYear => {
let iso_year = adjust_to_iso_year(interval)?;
if interval.years_field() < 0 {
write!(w, "{:+05}", iso_year).unwrap();
} else {
write!(w, "{:04}", iso_year).unwrap();
}
}
IsoYearMod100 => {
let iso_year = adjust_to_iso_year(interval)?;
if interval.years_field() % 100 < 0 {
let iso_year = -((-iso_year) % 100);
write!(w, "{:+03}", iso_year).unwrap();
} else {
let iso_year = iso_year % 100;
write!(w, "{:02}", iso_year).unwrap();
}
}
Month => {
let month = interval.months_field();
if month < 0 {
write!(w, "{:+03}", month).unwrap();
} else {
write!(w, "{:02}", month).unwrap();
}
}
Day => {
let day = interval.days_field();
if day < 0 {
write!(w, "{:+02}", day).unwrap();
} else {
write!(w, "{:02}", day).unwrap();
}
}
Hour => {
let hour = interval.hours_field();
if hour < 0 {
write!(w, "{:+03}", hour).unwrap();
} else {
write!(w, "{:02}", hour).unwrap();
}
}
Hour12 => {
let hour = interval.hours_field();
if hour < 0 {
// here to align with postgres, we format -0 as 012.
let hour = -(-hour) % 12;
if hour == 0 {
w.write_str("012").unwrap();
} else {
write!(w, "{:+03}", hour).unwrap();
}
} else {
let hour = if hour % 12 == 0 { 12 } else { hour % 12 };
write!(w, "{:02}", hour).unwrap();
}
}
Minute => {
let minute = interval.usecs() / 1_000_000 / 60;
if minute % 60 < 0 {
let minute = -((-minute) % 60);
write!(w, "{:+03}", minute).unwrap();
} else {
let minute = minute % 60;
write!(w, "{:02}", minute).unwrap();
}
}
Second => {
let second = interval.usecs() / 1_000_000;
if second % 60 < 0 {
let second = -((-second) % 60);
write!(w, "{:+03}", second).unwrap();
} else {
let second = second % 60;
write!(w, "{:02}", second).unwrap();
}
}
Nanosecond | Ordinal | WeekdayFromMon | NumDaysFromSun | IsoWeek | WeekFromSun
| WeekFromMon | IsoYearDiv100 | Timestamp | YearDiv100 | Internal(_) => {
unreachable!()
}
}
Ok(())
}
Item::Fixed(ref spec) => {
use chrono::format::Fixed::*;
match *spec {
LowerAmPm => {
if interval.hours_field() % 24 >= 12 {
w.write_str("pm").unwrap();
} else {
w.write_str("am").unwrap();
}
Ok(())
}
UpperAmPm => {
if interval.hours_field() % 24 >= 12 {
w.write_str("PM").unwrap();
} else {
w.write_str("AM").unwrap();
}
Ok(())
}
Nanosecond3 => {
let usec = interval.usecs() % 1_000_000;
write!(w, "{:03}", usec / 1000).unwrap();
Ok(())
}
Nanosecond6 => {
let usec = interval.usecs() % 1_000_000;
write!(w, "{:06}", usec).unwrap();
Ok(())
}
Internal(_) | ShortMonthName | LongMonthName | TimezoneOffset | TimezoneOffsetZ
| TimezoneOffsetColon => Err(invalid_pattern_err()),
ShortWeekdayName
| LongWeekdayName
| TimezoneName
| TimezoneOffsetDoubleColon
| TimezoneOffsetTripleColon
| TimezoneOffsetColonZ
| Nanosecond
| Nanosecond9
| RFC2822
| RFC3339 => unreachable!(),
}
}
Item::Error => Err(invalid_pattern_err()),
}
}
Loading

0 comments on commit c6410f1

Please sign in to comment.