From 2dd26b09029507450d37c80347f7ceb45fdc9514 Mon Sep 17 00:00:00 2001 From: macchiati Date: Thu, 11 Jul 2024 21:02:55 -0700 Subject: [PATCH 1/4] CLDR-8823 Tool for grouping calendars by behavior --- .../unicode/cldr/tool/ShowCalendarGroups.java | 196 ++++++++++++++++++ 1 file changed, 196 insertions(+) create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java new file mode 100644 index 00000000000..680e2aeff3c --- /dev/null +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java @@ -0,0 +1,196 @@ +package org.unicode.cldr.tool; + +import com.google.common.base.Joiner; +import com.google.common.collect.Comparators; +import com.google.common.collect.ComparisonChain; +import com.google.common.collect.ImmutableSet; +import com.google.common.collect.Multimap; +import com.google.common.collect.TreeMultimap; +import com.ibm.icu.impl.CalType; +import com.ibm.icu.util.BuddhistCalendar; +import com.ibm.icu.util.Calendar; +import com.ibm.icu.util.ChineseCalendar; +import com.ibm.icu.util.CopticCalendar; +import com.ibm.icu.util.DangiCalendar; +import com.ibm.icu.util.EthiopicCalendar; +import com.ibm.icu.util.GregorianCalendar; +import com.ibm.icu.util.HebrewCalendar; +import com.ibm.icu.util.IndianCalendar; +import com.ibm.icu.util.IslamicCalendar; +import com.ibm.icu.util.JapaneseCalendar; +import com.ibm.icu.util.PersianCalendar; +import com.ibm.icu.util.TaiwanCalendar; +import com.ibm.icu.util.TimeZone; +import com.ibm.icu.util.ULocale; +import java.util.Collection; +import java.util.Comparator; +import java.util.Date; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Set; +import java.util.stream.Collectors; +import org.unicode.cldr.util.CldrUtility; + +/** Utility to compute calendar groups based on months & days per month */ +public class ShowCalendarGroups { + // Current output: [calendars]fingerprint + // where the fingerprint is: + // months, max_days_per_month, details + // where the details are {month1=[days1, days2, ..], ...}, where the monthX = [...] are + // suppressed for brevity when monthX = [max_days_per_month] + // + // [chinese, dangi] {12, 30, {1=[29, 30], 2=[29, 30], 3=[29, 30], 4=[29, 30], 5=[29, 30], + // 6=[29, 30], 7=[29, 30], 8=[29, 30], 9=[29, 30], 10=[29, 30], 11=[29, 30], 12=[29, 30]}} + // [islamic, islamic-civil, islamic-rgsa, islamic-tbla, islamic-umalqura] {12, 30, {2=[29], + // 4=[29], 6=[29], 8=[29], 10=[29], 12=[29, 30]}} + // [indian] {12, 31, {1=[30, 31], 7=[30], 8=[30], 9=[30], 10=[30], 11=[30], 12=[30]}} + // [gregorian, iso8601, buddhist, japanese, roc] {12, 31, {2=[28, 29], 4=[30], 6=[30], + // 9=[30], 11=[30]}} + // [persian] {12, 31, {7=[30], 8=[30], 9=[30], 10=[30], 11=[30], 12=[29, 30]}} + // [hebrew] {13, 30, {2=[29, 30], 3=[29, 30], 4=[29], 6=[29, 30], 7=[29], 9=[29], 11=[29], + // 13=[29]}} + // [coptic, ethiopic, ethiopic-amete-alem] {13, 30, {13=[5, 6]}} + + public static void main(String[] args) { + TreeMultimap footprintToCalendar = TreeMultimap.create(); + for (CalType calType : CalType.values()) { + Calendar cal2 = makeCalendar(calType, TimeZone.GMT_ZONE, ULocale.ENGLISH); + Footprint footPrint = new Footprint(cal2); + footprintToCalendar.put(footPrint, calType); + } + System.out.println(); + for (Entry> entry : footprintToCalendar.asMap().entrySet()) { + System.out.println( + entry.getValue().stream().map(x -> x.getId()).collect(Collectors.toList()) + + "\t" + + entry.getKey()); + } + } + + static class Footprint implements Comparable { + final int maxMonths; + final int maxDaysPerMonth; + final Multimap daysInMonth; + + static Date d = new Date(2000 - 1900, 0, 1, 0, 0, 0); + + public Footprint(Calendar cal2) { + maxMonths = cal2.getMaximum(Calendar.MONTH) + 1; + maxDaysPerMonth = cal2.getMaximum(Calendar.DAY_OF_MONTH); + final Multimap _daysInMonth = TreeMultimap.create(); + cal2.setTime(d); // year may not be gregorian + + final int startYear = cal2.get(Calendar.YEAR); + cal2.set(Calendar.DAY_OF_MONTH, 1); + for (int year = startYear; year < startYear + 100; ++year) { + cal2.set(Calendar.YEAR, year); + for (int month = 1; month <= maxMonths; ++month) { + cal2.set(Calendar.MONTH, month - 1); + _daysInMonth.put(month, cal2.getActualMaximum(Calendar.DAY_OF_MONTH)); + } + } + Set maxSingleton = ImmutableSet.of(maxDaysPerMonth); + // filter out cases where the month has only the max + final Multimap _daysInMonth2 = TreeMultimap.create(); + _daysInMonth.asMap().values().removeIf(value -> value.equals(maxSingleton)); + daysInMonth = CldrUtility.protectCollection(_daysInMonth); + } + + @Override + public boolean equals(Object obj) { + return compareTo((Footprint) obj) == 0; + } + + @Override + public int hashCode() { + return Objects.hash(maxMonths, maxDaysPerMonth); + } + + @Override + public int compareTo(Footprint o) { + return ComparisonChain.start() + .compare(maxMonths, o.maxMonths) + .compare(maxDaysPerMonth, o.maxDaysPerMonth) + .compare(daysInMonth.entries(), o.daysInMonth.entries(), LIST_ENTRY_COMP) + .result(); + } + + @Override + public String toString() { + return "{" + Joiner.on(", ").join(maxMonths, maxDaysPerMonth, daysInMonth) + "}"; + } + + static Comparator> ENTRY_COMP = + new Comparator<>() { + @Override + public int compare(Entry o1, Entry o2) { + return ComparisonChain.start() + .compare(o1.getKey(), o2.getKey()) + .compare(o1.getValue(), o2.getValue()) + .result(); + } + }; + static Comparator>> LIST_ENTRY_COMP = + Comparators.lexicographical(ENTRY_COMP); + } + + // This is not visible in ICU, so cloning here + static Calendar makeCalendar(CalType calType, TimeZone zone, ULocale locale) { + Calendar cal = null; + switch (calType) { + case ISO8601: + case GREGORIAN: + cal = new GregorianCalendar(zone, locale); + break; + case BUDDHIST: + cal = new BuddhistCalendar(zone, locale); + break; + case CHINESE: + cal = new ChineseCalendar(zone, locale); + break; + case COPTIC: + cal = new CopticCalendar(zone, locale); + break; + case DANGI: + cal = new DangiCalendar(zone, locale); + break; + case ETHIOPIC: + cal = new EthiopicCalendar(zone, locale); + break; + case ETHIOPIC_AMETE_ALEM: + cal = new EthiopicCalendar(zone, locale); + ((EthiopicCalendar) cal).setAmeteAlemEra(true); + break; + case HEBREW: + cal = new HebrewCalendar(zone, locale); + break; + case INDIAN: + cal = new IndianCalendar(zone, locale); + break; + case ISLAMIC_CIVIL: + case ISLAMIC_UMALQURA: + case ISLAMIC_TBLA: + case ISLAMIC_RGSA: + case ISLAMIC: + cal = new IslamicCalendar(zone, locale); + break; + case JAPANESE: + cal = new JapaneseCalendar(zone, locale); + break; + case PERSIAN: + cal = new PersianCalendar(zone, locale); + break; + case ROC: + cal = new TaiwanCalendar(zone, locale); + break; + + default: + // we must not get here, because unknown type is mapped to + // Gregorian at the beginning of this method. + throw new IllegalArgumentException("Unknown calendar type"); + } + + return cal; + } +} From 70b1e7ca12b499a33ee4f3568b33bd92f905bbe5 Mon Sep 17 00:00:00 2001 From: macchiati Date: Fri, 12 Jul 2024 21:58:27 -0700 Subject: [PATCH 2/4] CLDR-8823 Made some fixes due to oddities in different calendar systems. --- .../unicode/cldr/tool/ShowCalendarGroups.java | 156 ++++++++++++++---- 1 file changed, 122 insertions(+), 34 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java index 680e2aeff3c..be39b2af419 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java @@ -1,5 +1,17 @@ package org.unicode.cldr.tool; +import java.util.Collection; +import java.util.Comparator; +import java.util.Date; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.stream.Collectors; + +import org.unicode.cldr.util.CldrUtility; + import com.google.common.base.Joiner; import com.google.common.collect.Comparators; import com.google.common.collect.ComparisonChain; @@ -22,15 +34,6 @@ import com.ibm.icu.util.TaiwanCalendar; import com.ibm.icu.util.TimeZone; import com.ibm.icu.util.ULocale; -import java.util.Collection; -import java.util.Comparator; -import java.util.Date; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.Set; -import java.util.stream.Collectors; -import org.unicode.cldr.util.CldrUtility; /** Utility to compute calendar groups based on months & days per month */ public class ShowCalendarGroups { @@ -55,9 +58,7 @@ public class ShowCalendarGroups { public static void main(String[] args) { TreeMultimap footprintToCalendar = TreeMultimap.create(); for (CalType calType : CalType.values()) { - Calendar cal2 = makeCalendar(calType, TimeZone.GMT_ZONE, ULocale.ENGLISH); - Footprint footPrint = new Footprint(cal2); - footprintToCalendar.put(footPrint, calType); + footprintToCalendar.put(new Footprint(calType), calType); } System.out.println(); for (Entry> entry : footprintToCalendar.asMap().entrySet()) { @@ -69,32 +70,103 @@ public static void main(String[] args) { } static class Footprint implements Comparable { - final int maxMonths; + final int maxMonthsPerYear; + final Set monthsInYear; + final int maxDaysPerYear; + final Set daysInYear; final int maxDaysPerMonth; - final Multimap daysInMonth; + final Multimap daysInMonths; + final int hash; + + static Date d = new Date(1900 - 1900, 0, 1, 0, 0, 0); + + public Footprint(CalType calType) { + if (calType.equals(CalType.CHINESE)) { + int debug = 0; + } + Calendar cal2 = makeCalendar(calType, TimeZone.GMT_ZONE, ULocale.ENGLISH); + + // HACK to get the right maximum number of months per year + // Two calendar systems don't make visible the Nth month of the year, + // and rather double up the number with a special affix. + // Plus, they do this inconsistently - static Date d = new Date(2000 - 1900, 0, 1, 0, 0, 0); + int hack = cal2.getMaximum(Calendar.MONTH) + 1; + final boolean isChineseCalendarBased = cal2 instanceof ChineseCalendar; + final boolean isHebrewCalendarBased = cal2 instanceof HebrewCalendar; + if (isChineseCalendarBased) { + // Chinese Calendar does not allow access to the max number of months per year, + // just the highest month number. + hack += 1; + // Hebrew does allow access to the max number of months per year, + // but not the current number of months per year. + // That requires a further hack + } - public Footprint(Calendar cal2) { - maxMonths = cal2.getMaximum(Calendar.MONTH) + 1; + maxMonthsPerYear = hack; maxDaysPerMonth = cal2.getMaximum(Calendar.DAY_OF_MONTH); - final Multimap _daysInMonth = TreeMultimap.create(); + maxDaysPerYear = cal2.getMaximum(Calendar.DAY_OF_YEAR); + final Set _monthsInYear = new TreeSet<>(); + final Set _daysInYear = new TreeSet<>(); + final Multimap _daysInMonths = TreeMultimap.create(); cal2.setTime(d); // year may not be gregorian final int startYear = cal2.get(Calendar.YEAR); - cal2.set(Calendar.DAY_OF_MONTH, 1); - for (int year = startYear; year < startYear + 100; ++year) { - cal2.set(Calendar.YEAR, year); - for (int month = 1; month <= maxMonths; ++month) { - cal2.set(Calendar.MONTH, month - 1); - _daysInMonth.put(month, cal2.getActualMaximum(Calendar.DAY_OF_MONTH)); + int currYear = startYear; + + for (int year = startYear; year < startYear + 5; ++year) { + cal2.set(Calendar.DAY_OF_MONTH, 1); + cal2.set(Calendar.MONTH, 0); + + final int currDaysPerYear = cal2.getActualMaximum(Calendar.DAY_OF_YEAR); + int currMonthsInYear = cal2.getActualMaximum(Calendar.MONTH) + 1; + + // Compensate for the Chinese & Hebrew Calendars not returning the actual number of + // months in a year + if (isChineseCalendarBased && currDaysPerYear >= 365) { + currMonthsInYear += 1; + } else if (isHebrewCalendarBased && currDaysPerYear < 365) { + currMonthsInYear -= 1; + } + + _monthsInYear.add(currMonthsInYear); + _daysInYear.add(currDaysPerYear); + int daysLeft = currDaysPerYear; + + // use clunky method because some months in Chinese / Arabic share a numeric value + // the 'month' variable is the nth month in the year, NOT the month with that number + for (int month = 0; ; ++month) { + int daysInThisMonth = cal2.getActualMaximum(Calendar.DAY_OF_MONTH); + _daysInMonths.put(month + 1, daysInThisMonth); + daysLeft -= daysInThisMonth; + + int oldMonth = cal2.get(Calendar.MONTH); + cal2.add(Calendar.MONTH, 1); + if (cal2.get(Calendar.MONTH) < oldMonth) { // we wrapped around + if (daysLeft != 0) { + // special hack for coptic, etc. + System.out.println(calType + " " + (month + 1) + " " + daysLeft); + } + break; + } } } - Set maxSingleton = ImmutableSet.of(maxDaysPerMonth); + // make immutable // filter out cases where the month has only the max - final Multimap _daysInMonth2 = TreeMultimap.create(); - _daysInMonth.asMap().values().removeIf(value -> value.equals(maxSingleton)); - daysInMonth = CldrUtility.protectCollection(_daysInMonth); + Set maxSingleton = ImmutableSet.of(maxDaysPerMonth); + _daysInMonths.asMap().values().removeIf(value -> value.equals(maxSingleton)); + + daysInYear = ImmutableSet.copyOf(_daysInYear); + monthsInYear = ImmutableSet.copyOf(_monthsInYear); + daysInMonths = CldrUtility.protectCollection(_daysInMonths); + hash = + Objects.hash( + maxMonthsPerYear, + maxDaysPerMonth, + maxDaysPerYear, + monthsInYear, + daysInYear, + daysInMonths); } @Override @@ -104,24 +176,38 @@ public boolean equals(Object obj) { @Override public int hashCode() { - return Objects.hash(maxMonths, maxDaysPerMonth); + return hash; } @Override public int compareTo(Footprint o) { return ComparisonChain.start() - .compare(maxMonths, o.maxMonths) + // single fields first + .compare(maxDaysPerYear, o.maxDaysPerYear) + .compare(maxMonthsPerYear, o.maxMonthsPerYear) .compare(maxDaysPerMonth, o.maxDaysPerMonth) - .compare(daysInMonth.entries(), o.daysInMonth.entries(), LIST_ENTRY_COMP) + // then structures + .compare(daysInYear, o.daysInYear, LEX_NATURAL_INTEGER) + .compare(monthsInYear, o.monthsInYear, LEX_NATURAL_INTEGER) + .compare(daysInMonths.entries(), o.daysInMonths.entries(), LIST_ENTRY_COMP) .result(); } @Override public String toString() { - return "{" + Joiner.on(", ").join(maxMonths, maxDaysPerMonth, daysInMonth) + "}"; + return "{" + + Joiner.on("\t") + .join( // + "maxDpY: ", maxDaysPerYear, // + "maxMpY: ", maxMonthsPerYear, // + "maxDpMs: ", maxDaysPerMonth, + "dpY: ", daysInYear, + "mpY: ", monthsInYear, + "dpMs: ", daysInMonths) + + "}"; } - static Comparator> ENTRY_COMP = + private static final Comparator> ENTRY_COMP = new Comparator<>() { @Override public int compare(Entry o1, Entry o2) { @@ -131,8 +217,10 @@ public int compare(Entry o1, Entry o2) { .result(); } }; - static Comparator>> LIST_ENTRY_COMP = + private static final Comparator>> LIST_ENTRY_COMP = Comparators.lexicographical(ENTRY_COMP); + private static final Comparator> LEX_NATURAL_INTEGER = + Comparators.lexicographical(Comparator.naturalOrder()); } // This is not visible in ICU, so cloning here From f2852fc8b63f2b46e366451c457d85754731569e Mon Sep 17 00:00:00 2001 From: macchiati Date: Sat, 13 Jul 2024 15:37:20 -0700 Subject: [PATCH 3/4] CLDR-8823 Make data easier to read --- .../unicode/cldr/tool/ShowCalendarGroups.java | 74 ++++++++++++------- 1 file changed, 49 insertions(+), 25 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java index be39b2af419..d644bd0d624 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java @@ -1,17 +1,5 @@ package org.unicode.cldr.tool; -import java.util.Collection; -import java.util.Comparator; -import java.util.Date; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Objects; -import java.util.Set; -import java.util.TreeSet; -import java.util.stream.Collectors; - -import org.unicode.cldr.util.CldrUtility; - import com.google.common.base.Joiner; import com.google.common.collect.Comparators; import com.google.common.collect.ComparisonChain; @@ -34,6 +22,18 @@ import com.ibm.icu.util.TaiwanCalendar; import com.ibm.icu.util.TimeZone; import com.ibm.icu.util.ULocale; +import java.util.Collection; +import java.util.Comparator; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Objects; +import java.util.Set; +import java.util.TreeSet; +import java.util.function.Function; +import java.util.stream.Collectors; +import org.unicode.cldr.util.CldrUtility; /** Utility to compute calendar groups based on months & days per month */ public class ShowCalendarGroups { @@ -60,16 +60,43 @@ public static void main(String[] args) { for (CalType calType : CalType.values()) { footprintToCalendar.put(new Footprint(calType), calType); } - System.out.println(); + System.out.println( + "Calendars\tmax days/year\tmax months/year\tmax days/month\tdays/year\tmonths/year\tdays/month"); for (Entry> entry : footprintToCalendar.asMap().entrySet()) { System.out.println( - entry.getValue().stream().map(x -> x.getId()).collect(Collectors.toList()) + entry.getValue().stream().map(x -> x.getId()).collect(Collectors.joining(" ")) + "\t" + entry.getKey()); } } + public static class MultimapJoiner { + final Joiner entriesJoiner; + final Joiner entryJoiner; + final Joiner entryValueJoiner; + + public MultimapJoiner(Joiner entriesJoiner, Joiner entryJoiner, Joiner entryValueJoiner) { + this.entriesJoiner = entriesJoiner; + this.entryJoiner = entryJoiner; + this.entryValueJoiner = entryValueJoiner; + } + + public String join(Multimap multimap) { + Function>, String> fii = + x -> entryJoiner.join(x.getKey(), entryValueJoiner.join(x.getValue())); + List list = + multimap.asMap().entrySet().stream().map(fii).collect(Collectors.toList()); + return entriesJoiner.join(list); + } + } + static class Footprint implements Comparable { + private static final Joiner SPACE_JOINER = Joiner.on(' '); + private static final Joiner EQ_JOINER = Joiner.on('='); + private static final Joiner COMMA_JOINER = Joiner.on(','); + private static final Joiner TAB_JOINER = Joiner.on("\t"); + private static final MultimapJoiner MM_JOINER = + new MultimapJoiner(SPACE_JOINER, EQ_JOINER, COMMA_JOINER); final int maxMonthsPerYear; final Set monthsInYear; final int maxDaysPerYear; @@ -145,7 +172,7 @@ public Footprint(CalType calType) { if (cal2.get(Calendar.MONTH) < oldMonth) { // we wrapped around if (daysLeft != 0) { // special hack for coptic, etc. - System.out.println(calType + " " + (month + 1) + " " + daysLeft); + System.out.println(calType + " " + (month + 1) + " " + daysLeft); } break; } @@ -195,16 +222,13 @@ public int compareTo(Footprint o) { @Override public String toString() { - return "{" - + Joiner.on("\t") - .join( // - "maxDpY: ", maxDaysPerYear, // - "maxMpY: ", maxMonthsPerYear, // - "maxDpMs: ", maxDaysPerMonth, - "dpY: ", daysInYear, - "mpY: ", monthsInYear, - "dpMs: ", daysInMonths) - + "}"; + return TAB_JOINER.join( + maxDaysPerYear, + maxMonthsPerYear, + maxDaysPerMonth, + SPACE_JOINER.join(daysInYear), + SPACE_JOINER.join(monthsInYear), + MM_JOINER.join(daysInMonths)); } private static final Comparator> ENTRY_COMP = From 317478833bbdadc4422b376fb58f51280e176881 Mon Sep 17 00:00:00 2001 From: macchiati Date: Sat, 13 Jul 2024 15:53:13 -0700 Subject: [PATCH 4/4] CLDR-8823 I'd set the number of years down to 5 for debugging; this restores it. --- .../src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java index d644bd0d624..546e525233a 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowCalendarGroups.java @@ -141,7 +141,7 @@ public Footprint(CalType calType) { final int startYear = cal2.get(Calendar.YEAR); int currYear = startYear; - for (int year = startYear; year < startYear + 5; ++year) { + for (int year = startYear; year < startYear + 1000; ++year) { cal2.set(Calendar.DAY_OF_MONTH, 1); cal2.set(Calendar.MONTH, 0);