From 94a64e4f89e085e62de0d95623ae005fa5cbb446 Mon Sep 17 00:00:00 2001 From: Mark Davis Date: Thu, 12 Sep 2024 08:47:56 -0700 Subject: [PATCH] CLDR-17892 ISO 8601 calendar (#4037) --- common/main/root.xml | 575 ++++++++++++++---- docs/site/downloads/cldr-46.md | 16 +- .../org/unicode/cldr/util/PathHeader.java | 1 + .../org/unicode/cldr/util/data/PathHeader.txt | 3 + .../unicode/cldr/util/data/Placeholders.txt | 3 +- .../unicode/cldr/unittest/TestCLDRFile.java | 1 + .../unicode/cldr/unittest/TestDateOrder.java | 323 +++++++++- .../TestDisplayAndInputProcessor.java | 28 +- 8 files changed, 808 insertions(+), 142 deletions(-) diff --git a/common/main/root.xml b/common/main/root.xml index d398b8c2dc2..5c87d314864 100644 --- a/common/main/root.xml +++ b/common/main/root.xml @@ -482,11 +482,11 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ {0} – {1} - h B – h B + h B – h B h–h B - h:mm B – h:mm B + h:mm B – h:mm B h:mm–h:mm B h:mm–h:mm B @@ -494,14 +494,14 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d–d - h a – h a - h–h a + h a – h a + h–h a HH–HH - h:mm a – h:mm a + h:mm a – h:mm a h:mm–h:mm a h:mm–h:mm a @@ -510,7 +510,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ HH:mm–HH:mm - h:mm a – h:mm a v + h:mm a – h:mm a v h:mm–h:mm a v h:mm–h:mm a v @@ -519,8 +519,8 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ HH:mm–HH:mm v - h a – h a v - h–h a v + h a – h a v + h–h a v HH–HH v @@ -529,58 +529,58 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ MM–MM - MM-dd – MM-dd - MM-dd – MM-dd + MM-dd – MM-dd + MM-dd – MM-dd - MM-dd, E – MM-dd, E - MM-dd, E – MM-dd, E + MM-dd, E – MM-dd, E + MM-dd, E – MM-dd, E LLL–LLL MMM d–d - MMM d – MMM d + MMM d – MMM d - MMM d, E – MMM d, E - MMM d, E – MMM d, E + MMM d, E – MMM d, E + MMM d, E – MMM d, E U–U - y-MM – y-MM - y-MM – y-MM + y-MM – y-MM + y-MM – y-MM - y-MM-dd – y-MM-dd - y-MM-dd – y-MM-dd - y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd - y-MM-dd, E – y-MM-dd, E - y-MM-dd, E – y-MM-dd, E - y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E U MMM–MMM - U MMM – U MMM + U MMM – U MMM U MMM d–d - U MMM d – MMM d - U MMM d – U MMM d + U MMM d – MMM d + U MMM d – U MMM d - U MMM d, E – MMM d, E - U MMM d, E – MMM d, E - U MMM d, E – U MMM d, E + U MMM d, E – MMM d, E + U MMM d, E – MMM d, E + U MMM d, E – U MMM d, E U MMMM–MMMM - U MMMM – U MMMM + U MMMM – U MMMM @@ -956,7 +956,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ G y MMM G y MMM d G y MMM d, E - h a + h a HH h:mm a HH:mm @@ -998,11 +998,11 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ {0} – {1} - h B – h B + h B – h B h–h B - h:mm B – h:mm B + h:mm B – h:mm B h:mm–h:mm B h:mm–h:mm B @@ -1010,52 +1010,52 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d–d - G y – G y + G y – G y G y–y - GGGGG y-MM – GGGGG y-MM - GGGGG y-MM – y-MM - GGGGG y-MM – y-MM + GGGGG y-MM – GGGGG y-MM + GGGGG y-MM – y-MM + GGGGG y-MM – y-MM - GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd – GGGGG y-MM-dd - GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – GGGGG y-MM-dd + GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd, E – y-MM-dd, E - GGGGG y-MM-dd, E – GGGGG y-MM-dd, E - GGGGG y-MM-dd, E – y-MM-dd, E - GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – GGGGG y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E - G y MMM – G y MMM + G y MMM – G y MMM G y MMM–MMM - G y MMM – y MMM + G y MMM – y MMM G y MMM d–d - G y MMM d – G y MMM d - G y MMM d – MMM d - G y MMM d – y MMM d + G y MMM d – G y MMM d + G y MMM d – MMM d + G y MMM d – y MMM d - G y MMM d, E – MMM d, E - G y MMM d, E – G y MMM d, E - G y MMM d, E – MMM d, E - G y MMM d, E – y MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – G y MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – y MMM d, E - h a – h a - h–h a + h a – h a + h–h a HH–HH - h:mm a – h:mm a + h:mm a – h:mm a h:mm–h:mm a h:mm–h:mm a @@ -1064,7 +1064,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ HH:mm–HH:mm - h:mm a – h:mm a v + h:mm a – h:mm a v h:mm–h:mm a v h:mm–h:mm a v @@ -1073,8 +1073,8 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ HH:mm–HH:mm v - h a – h a v - h–h a v + h a – h a v + h–h a v HH–HH v @@ -1083,58 +1083,58 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ MM–MM - MM-dd – MM-dd - MM-dd – MM-dd + MM-dd – MM-dd + MM-dd – MM-dd - MM-dd, E – MM-dd, E - MM-dd, E – MM-dd, E + MM-dd, E – MM-dd, E + MM-dd, E – MM-dd, E LLL–LLL MMM d–d - MMM d – MMM d + MMM d – MMM d - MMM d, E – MMM d, E - MMM d, E – MMM d, E + MMM d, E – MMM d, E + MMM d, E – MMM d, E G y–y - GGGGG y-MM – y-MM - GGGGG y-MM – y-MM + GGGGG y-MM – y-MM + GGGGG y-MM – y-MM - GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd, E – y-MM-dd, E - GGGGG y-MM-dd, E – y-MM-dd, E - GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E G y MMM–MMM - G y MMM – y MMM + G y MMM – y MMM G y MMM d–d - G y MMM d – MMM d - G y MMM d – y MMM d + G y MMM d – MMM d + G y MMM d – y MMM d - G y MMM d, E – MMM d, E - G y MMM d, E – MMM d, E - G y MMM d, E – y MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – y MMM d, E G y MMMM–MMMM - G y MMMM – y MMMM + G y MMMM – y MMMM @@ -1398,7 +1398,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ G y MMM G y MMM d G y MMM d, E - h a + h a HH h:mm a HH:mm @@ -1445,11 +1445,11 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ {0} – {1} - h B – h B + h B – h B h–h B - h:mm B – h:mm B + h:mm B – h:mm B h:mm–h:mm B h:mm–h:mm B @@ -1457,52 +1457,52 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d–d - G y – G y + G y – G y G y–y - GGGGG y-MM – GGGGG y-MM - GGGGG y-MM – y-MM - GGGGG y-MM – y-MM + GGGGG y-MM – GGGGG y-MM + GGGGG y-MM – y-MM + GGGGG y-MM – y-MM - GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd – GGGGG y-MM-dd - GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – GGGGG y-MM-dd + GGGGG y-MM-dd – y-MM-dd + GGGGG y-MM-dd – y-MM-dd - GGGGG y-MM-dd, E – y-MM-dd, E - GGGGG y-MM-dd, E – GGGGG y-MM-dd, E - GGGGG y-MM-dd, E – y-MM-dd, E - GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – GGGGG y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E + GGGGG y-MM-dd, E – y-MM-dd, E - G y MMM – G y MMM + G y MMM – G y MMM G y MMM–MMM - G y MMM – y MMM + G y MMM – y MMM G y MMM d–d - G y MMM d – G y MMM d - G y MMM d – MMM d - G y MMM d – y MMM d + G y MMM d – G y MMM d + G y MMM d – MMM d + G y MMM d – y MMM d - G y MMM d, E – MMM d, E - G y MMM d, E – G y MMM d, E - G y MMM d, E – MMM d, E - G y MMM d, E – y MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – G y MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – y MMM d, E - h a – h a - h–h a + h a – h a + h–h a HH–HH - h:mm a – h:mm a + h:mm a – h:mm a h:mm–h:mm a h:mm–h:mm a @@ -1511,7 +1511,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ HH:mm–HH:mm - h:mm a – h:mm a v + h:mm a – h:mm a v h:mm–h:mm a v h:mm–h:mm a v @@ -1520,8 +1520,8 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ HH:mm–HH:mm v - h a – h a v - h–h a v + h a – h a v + h–h a v HH–HH v @@ -1530,58 +1530,58 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ MM–MM - MM-dd – MM-dd - MM-dd – MM-dd + MM-dd – MM-dd + MM-dd – MM-dd - MM-dd, E – MM-dd, E - MM-dd, E – MM-dd, E + MM-dd, E – MM-dd, E + MM-dd, E – MM-dd, E LLL–LLL MMM d–d - MMM d – MMM d + MMM d – MMM d - MMM d, E – MMM d, E - MMM d, E – MMM d, E + MMM d, E – MMM d, E + MMM d, E – MMM d, E y–y - y-MM – y-MM - y-MM – y-MM + y-MM – y-MM + y-MM – y-MM - y-MM-dd – y-MM-dd - y-MM-dd – y-MM-dd - y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd - y-MM-dd, E – y-MM-dd, E - y-MM-dd, E – y-MM-dd, E - y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E y MMM–MMM - y MMM – y MMM + y MMM – y MMM y MMM d–d - y MMM d – MMM d - y MMM d – y MMM d + y MMM d – MMM d + y MMM d – y MMM d - y MMM d, E – MMM d, E - y MMM d, E – MMM d, E - y MMM d, E – y MMM d, E + y MMM d, E – MMM d, E + y MMM d, E – MMM d, E + y MMM d, E – y MMM d, E y MMMM–MMMM - y MMMM – y MMMM + y MMMM – y MMMM @@ -1936,6 +1936,313 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ + + + + + + + + + + + + + + + + + + + + y MMMM d, EEEE + yMMMMEEEEd + + + + + y MMMM d + yMMMMd + + + + + y MMM d + yMMMd + + + + + y-MM-dd + yMMdd + + + + + + + HH:mm:ss zzzz + HHmmsszzzz + + + + + HH:mm:ss z + HHmmssz + + + + + HH:mm:ss + HHmmss + + + + + HH:mm + HHmm + + + + + + + {1} {0} + + + + + + + + {1} {0} + + + + + + + + {1} {0} + + + + + + + + {1} {0} + + + + + + + h B + h:mm B + h:mm:ss B + d + ccc + E h:mm B + E h:mm:ss B + d, E + E h:mm a + E HH:mm + E h:mm:ss a + E HH:mm:ss + G y + G y-MM-dd + G y MMM + G y MMM d + G y MMM d, E + h a + HH + h:mm a + HH:mm + h:mm:ss a + HH:mm:ss + h:mm:ss a v + HH:mm:ss v + h:mm a v + HH:mm v + L + MM-dd + MM-dd, E + LLL + MMM d + MMM d + MMMM d + MMMM 'week' W + mm:ss + y + y-MM + y-MM-dd + y-MM-dd, E + y MMM + y MMM d + y MMM d, E + y MMMM + y QQQ + y QQQQ + Y 'week' w + + + {0} ({2}: {1}) + {0} {1} + {1} {0} + {0} ({2}: {1}) + {0} ({2}: {1}) + {0} ({2}: {1}) + {0} ({2}: {1}) + {0} ({2}: {1}) + {0} {1} + {0} ({2}: {1}) + {1} {0} + + + {0} – {1} + + h B – h B + h–h B + + + h:mm B – h:mm B + h:mm–h:mm B + h:mm–h:mm B + + + d–d + + + G y – G y + y–y + + + G y-MM – G y-MM + G y-MM – y-MM + G y-MM – y-MM + + + G y-MM-dd – y-MM-dd + G y-MM-dd – G y-MM-dd + G y-MM-dd – y-MM-dd + G y-MM-dd – y-MM-dd + + + G y-MM-dd, E – y-MM-dd, E + G y-MM-dd, E – G y-MM-dd, E + G y-MM-dd, E – y-MM-dd, E + G y-MM-dd, E – y-MM-dd, E + + + G y MMM – G y MMM + G y MMM–MMM + G y MMM – y MMM + + + G y MMM d–d + G y MMM d – G y MMM d + G y MMM d – MMM d + G y MMM d – y MMM d + + + G y MMM d, E – MMM d, E + G y MMM d, E – G y MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – y MMM d, E + + + h a – h a + h–h a + + + HH–HH + + + h:mm a – h:mm a + h:mm–h:mm a + h:mm–h:mm a + + + HH:mm–HH:mm + HH:mm–HH:mm + + + h:mm a – h:mm a v + h:mm–h:mm a v + h:mm–h:mm a v + + + HH:mm–HH:mm v + HH:mm–HH:mm v + + + h a – h a v + h–h a v + + + HH–HH v + + + MM–MM + + + MM-dd – MM-dd + MM-dd – MM-dd + + + MM-dd, E – MM-dd, E + MM-dd, E – MM-dd, E + + + LLL–LLL + + + MMM d–d + MMM d – MMM d + + + MMM d, E – MMM d, E + MMM d, E – MMM d, E + + + y–y + + + y-MM – y-MM + y-MM – y-MM + + + y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd + y-MM-dd – y-MM-dd + + + y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E + y-MM-dd, E – y-MM-dd, E + + + y MMM–MMM + y MMM – y MMM + + + y MMM d–d + y MMM d – MMM d + y MMM d – y MMM d + + + y MMM d, E – MMM d, E + y MMM d, E – MMM d, E + y MMM d, E – y MMM d, E + + + y MMMM–MMMM + y MMMM – y MMMM + + + + diff --git a/docs/site/downloads/cldr-46.md b/docs/site/downloads/cldr-46.md index eb6f9f2ae57..97f8702ecdd 100644 --- a/docs/site/downloads/cldr-46.md +++ b/docs/site/downloads/cldr-46.md @@ -66,9 +66,13 @@ For a full listing, see [Delta DTDs](https://unicode.org/cldr/charts/46/suppleme 1. Currency 1. New currency code `ZWG` added — because it was late in the cycle, many locales will just support the code (no symbol or name). -2. Timezones and Metazones - 1. Changed the metazone for Kazakhstan to reflect removal of Asia/Almaty, thus dropping the distinction among different regions in Kazakhstan. - 2. Added support for deprecated codes by remapping: `CST6CDT → America/Chicago`, `EST → America/Panama`, `EST5EDT → America/New_York`, `MST7MDT → America/Denver`, `PST8PDT → America/Los_Angeles`. +2. Dates & Times + 1. Added a new calendar type, `iso8601`. +This is not the same as the [ISO 8601](https://en.wikipedia.org/wiki/ISO_8601) standard format, which is designed just for data interchange: +it is all ASCII, doesn't have all the options for fields (like "Sunday", "BC", or "AM"), and does not contain spaces. +The CLDR `iso8601` calendar uses patterns in the order: era, year, month, day, day-of-week, hour, minute, second, day-period, timezone + 2. Changed the metazone for Kazakhstan to reflect removal of Asia/Almaty, thus dropping the distinction among different regions in Kazakhstan. + 3. Added support for deprecated timezone codes by remapping: `CST6CDT → America/Chicago`, `EST → America/Panama`, `EST5EDT → America/New_York`, `MST7MDT → America/Denver`, `PST8PDT → America/Los_Angeles`. 3. Units 1. Added units: `portion-per-1e9` (aka per-billion), `night` (for hotel stays), `light-speed` (as an internal prefix for **light-second**, **light-minute**, etc.) 2. Changed preferred wind speed preference for some locales to `meter-per-second`. @@ -112,7 +116,11 @@ For a full listing, see [¤¤BCP47 Delta](https://unicode.org/cldr/charts/46/del 1. Major changes to emoji search keywords and short names (see below) 2. Major changes to Chinese collation, reflecting new data in Unicode 16.0 -3. Other changes +3. Added iso8601 patterns to root. +These will use localized months, days of the week, day periods, and timezones. +In this first version, the separators are not localized, and will use "-" within numeric dates, ":" within times, and " " or ", " between major elements. +Full localization will await the next submission phase for CLDR. +4. Other changes 1. Various locales also had smaller improvements agreed to by translators. 2. Additional test files have been added. diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/PathHeader.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/PathHeader.java index 0bb98b04850..fef2148fcb6 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/PathHeader.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/PathHeader.java @@ -189,6 +189,7 @@ public enum PageId { Fields(SectionId.DateTime), Relative(SectionId.DateTime), Gregorian(SectionId.DateTime), + ISO8601(SectionId.DateTime, "ISO 8601"), Generic(SectionId.DateTime), Buddhist(SectionId.DateTime), Chinese(SectionId.DateTime), diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt index 350f5cbc190..30d3c5b2260 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt @@ -119,11 +119,14 @@ //ldml/dates/calendars/calendar[@type="%A"]/dateTimeFormats/availableFormats/dateFormatItem[@id="%A"][@alt="ascii"] ; Special ; Suppress ; &calendar($1); &calField(Formats:Flexible:date)-$2-ascii ; HIDE //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*(a|b|B)*[h]%a)"] ; DateTime ; &calendar(gregorian); &calField(Formats:Flexible:time12) ; $1 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*[Hms]%a)"] ; DateTime ; &calendar(gregorian); &calField(Formats:Flexible:time24) ; $1 ; LTR_ALWAYS +//ldml/dates/calendars/calendar[@type="iso8601"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*(a|b|B)*[h]%a)"] ; DateTime ; &calendar(iso8601); &calField(Formats:Flexible:time12) ; $1 ; LTR_ALWAYS +//ldml/dates/calendars/calendar[@type="iso8601"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*[Hms]%a)"] ; DateTime ; &calendar(iso8601); &calField(Formats:Flexible:time24) ; $1 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="generic"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*(a|b|B)*[h]%a)"] ; DateTime ; &calendar(generic); &calField(Formats:Flexible:time12) ; $1 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="generic"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*[Hms]%a)"] ; DateTime ; &calendar(generic); &calField(Formats:Flexible:time24) ; $1 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="chinese"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*(a|b|B)*[h]%a)"] ; DateTime ; &calendar(chinese); &calField(Formats:Flexible:time12) ; $1 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="chinese"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(E*[Hms]%a)"] ; DateTime ; &calendar(chinese); &calField(Formats:Flexible:time24) ; $1 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="gregorian"]/dateTimeFormats/availableFormats/dateFormatItem[@id="%A"][@count="%A"] ; DateTime ; &calendar(gregorian); &calField(Formats:Flexible:date) ; $1-$2 ; LTR_ALWAYS +//ldml/dates/calendars/calendar[@type="iso8601"]/dateTimeFormats/availableFormats/dateFormatItem[@id="%A"][@count="%A"] ; DateTime ; &calendar(iso8601); &calField(Formats:Flexible:date) ; $1-$2 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="%N"]/dateTimeFormats/availableFormats/dateFormatItem[@id="(?![^"]*[hHms])%A"] ; DateTime ; &calendar($1); &calField(Formats:Flexible:date) ; $2 ; LTR_ALWAYS //ldml/dates/calendars/calendar[@type="%A"]/dateTimeFormats/availableFormats/dateFormatItem[@id="%A"][@count="%A"] ; Special ; Suppress ; &calendar($1); &calField(Formats:Flexible:date)-$2-$3 ; HIDE //ldml/dates/calendars/calendar[@type="%A"]/dateTimeFormats/availableFormats/dateFormatItem[@id="%A"] ; Special ; Suppress ; &calendar($1); &calField(Formats:Flexible:date)-$2 ; HIDE diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/Placeholders.txt b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/Placeholders.txt index f8252955650..89464cf8327 100644 --- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/Placeholders.txt +++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/Placeholders.txt @@ -11,7 +11,8 @@ ^//ldml/dates/calendars/calendar\[@type="%A"]/dateTimeFormats/dateTimeFormatLength\[@type="%A"]/dateTimeFormat\[@type="%A"]/pattern\[@type="%A"] ; {1}=DATE Dec 10, 2010 ; {0}=TIME 3:59 -# strange values +# The code does not currently allow for showing the placeholders in datetime formats + #^//ldml/dates/.*(pattern|available|intervalFormatItem) ; cccc=UNKNOWN ? #^//ldml/dates/.*(pattern|available|intervalFormatItem) ; ccc=UNKNOWN ? # diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java index 202152c6a20..520fc88c9d7 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCLDRFile.java @@ -192,6 +192,7 @@ public void testExtraPaths() { || path.contains("/gender") || path.startsWith("//ldml/numbers/currencies/currency") || path.startsWith("//ldml/personNames/sampleName") + || path.contains("/availableFormats") ? PlaceholderStatus.DISALLOWED : path.contains("/compoundUnitPattern1") ? PlaceholderStatus.REQUIRED diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDateOrder.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDateOrder.java index e6650b0b38e..85dd1068364 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDateOrder.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDateOrder.java @@ -1,16 +1,39 @@ package org.unicode.cldr.unittest; +import com.google.common.base.Joiner; +import com.google.common.collect.HashMultimap; +import com.google.common.collect.Multimap; import com.ibm.icu.dev.test.TestFmwk; import com.ibm.icu.text.DateTimePatternGenerator; +import com.ibm.icu.text.DateTimePatternGenerator.VariableField; +import com.ibm.icu.text.SimpleDateFormat; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.util.TimeZone; +import java.time.Instant; +import java.util.ArrayList; import java.util.Collection; +import java.util.Date; import java.util.HashSet; +import java.util.List; +import java.util.Locale; import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; +import java.util.TreeSet; import org.unicode.cldr.test.DateOrder; +import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; +import org.unicode.cldr.util.CLDRLocale; +import org.unicode.cldr.util.ICUServiceBuilder; +import org.unicode.cldr.util.PathHeader; +import org.unicode.cldr.util.PathHeader.Factory; import org.unicode.cldr.util.SimpleXMLSource; import org.unicode.cldr.util.XMLSource; +import org.unicode.cldr.util.XPathParts; public class TestDateOrder extends TestFmwk { + private static final Joiner JOIN_TAB = Joiner.on('\t'); + public static void main(String[] args) { new TestDateOrder().run(args); } @@ -51,7 +74,7 @@ public void TestDateImportance() { source.putValueAtPath(fullDate, "EEEE, y MMMM dd"); order = DateOrder.getOrderingInfo(cldrFile, cldrFile, fp); - values = new HashSet(order.get(fullDate).values()); // filter + values = new HashSet<>(order.get(fullDate).values()); // filter // duplicates assertEquals("There should be a conflict with other date values", 1, values.size()); assertTrue("No conflict with long date", values.contains(longDate)); @@ -68,4 +91,302 @@ public void TestDateImportance() { assertTrue("Available format conflict not found", values.contains(availableFormat)); assertTrue("Date format conflict not found", values.contains(fullDate)); } + + static final String stockDatePathPrefix = + "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateFormats/dateFormatLength"; + static final String stockTimePathPrefix = + "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/timeFormats/timeFormatLength"; + static final String availableFormatPathPrefix = + "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/availableFormats/dateFormatItem"; + static final String intervalFormatPathPrefix = + "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dateTimeFormats/intervalFormats/"; + + public void TestIso8601() { + List printout = null; + if (isVerbose()) { + printout = new ArrayList<>(); + } else { + warnln("Use -v to see a comparison between calendars"); + } + + ICUServiceBuilder isb = ICUServiceBuilder.forLocale(CLDRLocale.getInstance("en")); + ICUServiceBuilder isbCan = ICUServiceBuilder.forLocale(CLDRLocale.getInstance("en_CA")); + CLDRFile english = CLDRConfig.getInstance().getEnglish(); + CLDRFile englishCan = CLDRConfig.getInstance().getCldrFactory().make("en_CA", true); + Factory phf = PathHeader.getFactory(); + + Set paths = new TreeSet<>(); + for (String path : english) { + if (!path.startsWith("//ldml/dates/calendars/calendar[@type=\"gregorian\"]")) { + continue; + } else if (path.startsWith(stockTimePathPrefix) + || path.startsWith(stockDatePathPrefix)) { + if (!path.contains("datetimeSkeleton")) { + paths.add(phf.fromPath(path)); + } + } else if (path.startsWith(availableFormatPathPrefix)) { + paths.add(phf.fromPath(path)); + } else if (path.startsWith(intervalFormatPathPrefix)) { + if (!path.contains("intervalFormatFallback")) { + paths.add(phf.fromPath(path)); + } + } else { + int debug = 0; + } + } + Date sample = Date.from(Instant.parse("2024-01-13T19:08:09Z")); + SimpleDateFormat neutralFormat = + new SimpleDateFormat("G yyyy-MM-dd HH:mm:ss X", Locale.ROOT); + neutralFormat.setTimeZone(TimeZone.GMT_ZONE); + + for (PathHeader pathHeader : paths) { + final String originalPath = pathHeader.getOriginalPath(); + String code = pathHeader.getCode(); + + if (originalPath.startsWith(stockTimePathPrefix)) { + code = "time-" + code; + } else if (originalPath.startsWith(stockDatePathPrefix)) { + code = "date-" + code; + } + String gregPat = english.getStringValue(originalPath); + String isoPat = + english.getStringValue(originalPath.replace("\"gregorian\"", "\"iso8601\"")); + String canPat = englishCan.getStringValue(originalPath); + + String gregFormatted = null; + String isoFormatted = null; + String canFormatted = null; + + String sampleDate = null; + + if (originalPath.contains("intervalFormats")) { + Date sample1 = (Date) sample.clone(); + Date sample2 = (Date) sample.clone(); + XPathParts parts = XPathParts.getFrozenInstance(originalPath); + String greatestDifference = parts.getAttributeValue(-1, "id"); + + switch (greatestDifference) { + case "G": + sample1.setYear(sample.getYear() - 3000); + break; + case "y": + sample2.setYear(sample.getYear() + 1); + break; + case "M": + sample2.setMonth(sample.getMonth() + 1); + break; + case "d": + sample2.setDate(sample.getDate() + 1); + break; + case "h": + case "H": + sample2.setHours(sample.getHours() + 1); + break; + case "a": + case "B": + sample1.setHours(sample.getHours() - 12); + break; + case "m": + sample2.setMinutes(sample.getMinutes() + 1); + break; + case "s": + sample2.setSeconds(sample.getSeconds() + 1); + break; + default: + System.out.println("Missing" + greatestDifference); + break; + } + sampleDate = neutralFormat.format(sample1) + " - " + neutralFormat.format(sample2); + + check(isoPat, Set.of(Check.dayperiod)); + + List parts2 = splitIntervalPattern(isoPat); + check( + parts2.get(0), + Set.of(Check.order, Check.uniqueness)); // check first part of interval + check( + parts2.get(2), + Set.of(Check.order, Check.uniqueness)); // check second part of interval + + gregFormatted = formatInterval(isb, sample1, sample2, "gregorian", gregPat); + isoFormatted = formatInterval(isb, sample1, sample2, "iso8601", isoPat); + canFormatted = formatInterval(isbCan, sample1, sample2, "gregorian", canPat); + } else { + check(isoPat, Set.of(Check.order, Check.uniqueness, Check.dayperiod)); + + sampleDate = neutralFormat.format(sample); + + SimpleDateFormat gregFormat = isb.getDateFormat("gregorian", gregPat); + gregFormat.setTimeZone(TimeZone.GMT_ZONE); + SimpleDateFormat isoFormat = isb.getDateFormat("iso8601", isoPat); + isoFormat.setTimeZone(TimeZone.GMT_ZONE); + SimpleDateFormat caFormat = isbCan.getDateFormat("gregorian", gregPat); + caFormat.setTimeZone(TimeZone.GMT_ZONE); + + gregFormatted = gregFormat.format(sample); + isoFormatted = isoFormat.format(sample); + canFormatted = caFormat.format(sample); + } + if (printout != null) { + canFormatted = canFormatted.replace("a.m.", "AM").replace("p.m.", "PM"); + printout.add( + JOIN_TAB.join( + code, + gregPat, + isoPat, + canPat, + sampleDate, + gregFormatted, + isoFormatted, + canFormatted)); + } + } + if (printout != null) { + System.out.println(); + for (String line : printout) { + System.out.println(line); + } + } + } + + static final List expectedOrder = + List.of( + DateTimePatternGenerator.ERA, + DateTimePatternGenerator.YEAR, + DateTimePatternGenerator.QUARTER, + DateTimePatternGenerator.MONTH, + DateTimePatternGenerator.DAY, + DateTimePatternGenerator.WEEK_OF_YEAR, + DateTimePatternGenerator.WEEK_OF_MONTH, + DateTimePatternGenerator.WEEKDAY, + DateTimePatternGenerator.HOUR, + DateTimePatternGenerator.MINUTE, + DateTimePatternGenerator.SECOND, + DateTimePatternGenerator.DAYPERIOD, + DateTimePatternGenerator.ZONE); + + enum Check { + order, + dayperiod, + uniqueness + } + + private void check(String isoPat, Set checks) { + VariableField last = null; + int lastType = -1; + Multimap types = HashMultimap.create(); + + // check the order. y M is ok, because type(y) < type(M) + + for (Object p : parser.set(isoPat).getItems()) { + if (p instanceof VariableField) { + VariableField pv = (VariableField) p; + final int rawType = pv.getType(); + int curType = expectedOrder.indexOf(rawType); + if (!assertTrue(pv + ": order > 0", curType >= 0)) { + int debug = 0; + } + if (checks.contains(Check.order) && lastType != -1) { + assertTrue(isoPat + ": " + last + " < " + pv, lastType < curType); + } + last = pv; + lastType = curType; + types.put(rawType, pv.toString()); + } + } + + // There is only one field of each type + + if (checks.contains(Check.uniqueness)) { + for (Entry> entry : types.asMap().entrySet()) { + assertEquals(entry.toString(), 1, entry.getValue().size()); + } + } + + // There is an a/B iff it is 12 hour + if (checks.contains(Check.dayperiod)) { + boolean hasDayPeriod = types.containsKey(DateTimePatternGenerator.DAYPERIOD); + Collection hours = types.get(DateTimePatternGenerator.HOUR); + char firstChar = + hours == null || hours.isEmpty() ? '\u0000' : hours.iterator().next().charAt(0); + boolean is12hour = firstChar == 'h' || firstChar == 'k'; + if (!assertEquals(isoPat + " has 'a' iff 12 hour", hasDayPeriod, is12hour)) { + int debug = 0; + } + } + } + + public String formatInterval( + ICUServiceBuilder isb, Date sample, Date sample2, String calendar, String pattern) { + List parts = splitIntervalPattern(pattern); + SimpleDateFormat gregFormat1 = isb.getDateFormat(calendar, parts.get(0)); + gregFormat1.setTimeZone(TimeZone.GMT_ZONE); + + SimpleDateFormat gregFormat2 = isb.getDateFormat(calendar, parts.get(2)); + gregFormat2.setTimeZone(TimeZone.GMT_ZONE); + + return gregFormat1.format(sample) + parts.get(1) + gregFormat2.format(sample2); + } + + DateTimePatternGenerator.FormatParser parser = new DateTimePatternGenerator.FormatParser(); + + private List splitIntervalPattern(String intervalPattern) { + List result = new ArrayList<>(); + StringBuilder current = new StringBuilder(); + Set soFar = new HashSet<>(); + + // we have something of the form (literal? field)* sepLiteral (field literal?)* + // that is, there are never 2 literals in a row. + // a literal is a sepLiteral if the field after it is already present (or rather, if its + // type is) + String lastString = null; + + for (Object p : parser.set(intervalPattern).getItems()) { + if (p instanceof String) { + lastString = (String) p; + } else if (p instanceof VariableField) { + VariableField pv = (VariableField) p; + if (soFar != null && soFar.contains(pv.getType())) { + // we hit the first repeated field + result.add(current.toString()); + current.setLength(0); + result.add( + lastString == null + ? "" + : lastString); // it would be strange to have "", but... + lastString = null; + soFar = null; + } else { + if (soFar != null) { + soFar.add(pv.getType()); + } + if (lastString != null) { + current.append(quoteIfNeeded(lastString)); + lastString = null; + } + } + current.append(p); + } else { + throw new IllegalArgumentException(); + } + } + if (lastString != null) { + current.append(quoteIfNeeded(lastString)); + } + result.add(current.toString()); + if (result.size() != 3) { + throw new IllegalArgumentException(); + } + return result; + } + + static final UnicodeSet VARIABLE = new UnicodeSet("[a-zA-Z']").freeze(); + + private Object quoteIfNeeded(String lastString) { + if (VARIABLE.containsSome(lastString)) { + lastString = lastString.replace("'", "''"); + lastString = "'" + lastString + "'"; + } + return lastString; + } } diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java index 82270a558e4..463049568ea 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java @@ -1,6 +1,7 @@ package org.unicode.cldr.unittest; import com.ibm.icu.dev.test.TestFmwk; +import com.ibm.icu.impl.Utility; import com.ibm.icu.lang.CharSequences; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; @@ -351,12 +352,13 @@ private void showCldrFile(final CLDRFile cldrFile) { String input = daip.processInput(path, display, internalException); String diff = diff(value, input, path); if (diff != null) { + // repeat for debugging display = daip.processForDisplay(path, value); input = daip.processInput(path, display, internalException); diff(value, input, path); errln( cldrFile.getLocaleID() - + "\tNo roundtrip in DAIP:" + + "\tNo roundtrip in DAIP, value ≠ processInput(display(value)):" + "\n\t value<" + value + ">\n\tdisplay<" @@ -463,7 +465,29 @@ private String diff(String value, String input, String path) { if (value2.equals(input)) { return null; } - return "?"; + + return firstDiff(value2, input); + } + + private String firstDiff(String v1, String v2) { + int i = 0; + while (i < v1.length() && i < v2.length()) { + int cp1 = v1.codePointAt(i); + int cp2 = v2.codePointAt(i); + if (cp1 != cp2) { + return v1.substring(0, i) + + " ⓥ:" + + Utility.hex(cp1) + + " ≠ Ⓘ(Ⓓ(ⓥ)):" + + Utility.hex(cp2); + } + i += cp1 <= 0xFFFF ? 1 : 2; + } + return v1.substring(0, i) + + " ⓥ:" + + v1.substring(i, v1.length()) + + " ≠ Ⓘ(Ⓓ(ⓥ)):" + + v2.substring(i, v2.length()); } /** Test whether DisplayAndInputProcessor.processInput removes backspaces */