From c0001a269329f96a3055b0600934423d54b0231d Mon Sep 17 00:00:00 2001 From: Tom Bishop Date: Wed, 20 Nov 2024 15:12:50 -0500 Subject: [PATCH] CLDR-7428 Freeze collators; new class CollatorHelper (#4207) --- .../unicode/cldr/unittest/web/TestAll.java | 69 ------------------- .../java/org/unicode/cldr/draft/Misc.java | 7 +- .../org/unicode/cldr/icu/ExtractICUData.java | 12 +--- .../org/unicode/cldr/icu/LDMLComparator.java | 9 +-- .../org/unicode/cldr/tool/CLDRModify.java | 4 +- .../org/unicode/cldr/tool/ChartCollation.java | 2 - .../cldr/tool/ChartLanguageGroups.java | 8 +-- .../unicode/cldr/tool/CheckAnnotations.java | 6 +- .../org/unicode/cldr/tool/CompareData.java | 9 --- .../org/unicode/cldr/tool/CompareEmoji.java | 9 +-- .../cldr/tool/ConvertLanguageData.java | 10 +-- .../org/unicode/cldr/tool/CountItems.java | 8 +-- .../unicode/cldr/tool/ExtractMessages.java | 3 +- .../cldr/tool/FormattedFileWriter.java | 5 +- .../unicode/cldr/tool/GenerateComparison.java | 3 +- .../cldr/tool/GenerateSidewaysView.java | 18 ++--- .../unicode/cldr/tool/GenerateStatistics.java | 5 +- .../cldr/tool/GenerateTransformCharts.java | 12 ++-- .../unicode/cldr/tool/MakeTransliterator.java | 3 +- .../main/java/org/unicode/cldr/tool/Misc.java | 4 +- .../java/org/unicode/cldr/tool/ShowData.java | 9 --- .../org/unicode/cldr/tool/TablePrinter.java | 3 +- .../org/unicode/cldr/util/CLDRConfig.java | 35 +--------- .../unicode/cldr/util/CollationMapMaker.java | 3 +- .../org/unicode/cldr/util/CollatorHelper.java | 67 ++++++++++++++++++ .../java/org/unicode/cldr/util/DtdData.java | 2 +- .../java/org/unicode/cldr/util/Emoji.java | 2 +- .../org/unicode/cldr/util/MapComparator.java | 19 +---- .../cldr/util/ReferenceStringSearch.java | 3 +- .../cldr/util/SimpleUnicodeSetFormatter.java | 2 +- .../org/unicode/cldr/util/TestUtilities.java | 7 +- .../cldr/util/UnicodeSetPrettyPrinter.java | 8 +-- .../cldr/util/VerifyCompactNumbers.java | 2 +- .../org/unicode/cldr/util/VerifyZones.java | 2 +- .../org/unicode/cldr/util/VettingViewer.java | 2 +- .../org/unicode/cldr/util/VoteResolver.java | 5 +- .../TestCollationStringByteConverter.java | 6 +- .../org/unicode/cldr/unittest/TestHelper.java | 6 +- .../unittest/TestReferenceStringSearch.java | 10 +-- 39 files changed, 138 insertions(+), 261 deletions(-) create mode 100644 tools/cldr-code/src/main/java/org/unicode/cldr/util/CollatorHelper.java diff --git a/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java b/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java index f2c65735bb4..5fdaea6cbb8 100644 --- a/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java +++ b/tools/cldr-apps/src/test/java/org/unicode/cldr/unittest/web/TestAll.java @@ -7,8 +7,6 @@ import com.ibm.icu.dev.test.TestFmwk.TestGroup; import com.ibm.icu.dev.test.TestLog; -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.RuleBasedCollator; import java.io.File; import java.io.PrintWriter; import java.sql.SQLException; @@ -16,12 +14,8 @@ import org.unicode.cldr.test.CheckCLDR; import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRConfig.Environment; -import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; -import org.unicode.cldr.util.Factory; -import org.unicode.cldr.util.StandardCodes; -import org.unicode.cldr.util.SupplementalDataInfo; import org.unicode.cldr.web.CLDRProgressIndicator; import org.unicode.cldr.web.DBUtils; import org.unicode.cldr.web.SurveyLog; @@ -128,13 +122,6 @@ public TestAll() { public static class WebTestInfo { private static WebTestInfo INSTANCE = null; - private SupplementalDataInfo supplementalDataInfo; - private StandardCodes sc; - private Factory cldrFactory; - private CLDRFile english; - private CLDRFile root; - private RuleBasedCollator col; - public static WebTestInfo getInstance() { synchronized (WebTestInfo.class) { if (INSTANCE == null) { @@ -145,62 +132,6 @@ public static WebTestInfo getInstance() { } private WebTestInfo() {} - - public SupplementalDataInfo getSupplementalDataInfo() { - synchronized (this) { - if (supplementalDataInfo == null) { - supplementalDataInfo = - SupplementalDataInfo.getInstance(CLDRPaths.SUPPLEMENTAL_DIRECTORY); - } - } - return supplementalDataInfo; - } - - public StandardCodes getStandardCodes() { - synchronized (this) { - if (sc == null) { - sc = StandardCodes.make(); - } - } - return sc; - } - - public Factory getCldrFactory() { - synchronized (this) { - if (cldrFactory == null) { - cldrFactory = Factory.make(CLDRPaths.MAIN_DIRECTORY, ".*"); - } - } - return cldrFactory; - } - - public CLDRFile getEnglish() { - synchronized (this) { - if (english == null) { - english = getCldrFactory().make("en", true); - } - } - return english; - } - - public CLDRFile getRoot() { - synchronized (this) { - if (root == null) { - root = getCldrFactory().make("root", true); - } - } - return root; - } - - public Collator getCollator() { - synchronized (this) { - if (col == null) { - col = (RuleBasedCollator) Collator.getInstance(); - col.setNumericCollation(true); - } - } - return col; - } } static boolean dbSetup = false; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/draft/Misc.java b/tools/cldr-code/src/main/java/org/unicode/cldr/draft/Misc.java index 04f964c8ee0..208b904941e 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/draft/Misc.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/draft/Misc.java @@ -36,6 +36,7 @@ import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRFile.WinningChoice; import org.unicode.cldr.util.CLDRPaths; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.LanguageTagParser; import org.unicode.cldr.util.LocaleIDParser; @@ -143,7 +144,9 @@ private static void showDefaultContent(String... strings) { private static void showSortKey() { String[] tests = "a ä A ぁ あ ァ ァ ア ア ㋐".split(" "); - RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); + // TODO: freeze the Collator; problematic since changed in innermost for loop below + // Reference: https://unicode-org.atlassian.net/browse/CLDR-7428 + RuleBasedCollator c = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); c.setStrength(RuleBasedCollator.QUATERNARY); c.setCaseLevel(true); c.setHiraganaQuaternary(true); @@ -319,7 +322,7 @@ private static void showExemplarSize() { String[] locales = "en ru nl en-GB fr de it pl pt-BR es tr th ja zh-CN zh-TW ko ar bg sr uk ca hr cs da fil fi hu id lv lt no pt-PT ro sk sl es-419 sv vi el iw fa hi am af et is ms sw zu bn mr ta eu fr-CA gl zh-HK ur gu kn ml te" .split(" "); - Set nameAndInfo = new TreeSet<>(info.getCollator()); + Set nameAndInfo = new TreeSet<>(CollatorHelper.EMOJI_COLLATOR); for (String localeCode : locales) { String baseLanguage = ltp.set(localeCode).getLanguage(); R2, String> temp = lang2replacement.get(baseLanguage); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java index d436c0bc965..f9ee6673215 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/icu/ExtractICUData.java @@ -12,9 +12,7 @@ import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; import com.ibm.icu.text.Collator; -import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.Transliterator; -import com.ibm.icu.util.ULocale; import com.ibm.icu.util.UResourceBundle; import java.io.BufferedReader; import java.io.File; @@ -35,6 +33,7 @@ import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.PathUtilities; import org.unicode.cldr.util.PatternCache; import org.unicode.cldr.util.SimpleFactory; @@ -411,8 +410,7 @@ static void testProps() { {UProperty.DOUBLE_START, UProperty.DOUBLE_START}, {UProperty.STRING_START, UProperty.STRING_LIMIT}, }; - Collator col = Collator.getInstance(ULocale.ROOT); - ((RuleBasedCollator) col).setNumericCollation(true); + Collator col = CollatorHelper.ROOT_NUMERIC; Map> alpha = new TreeMap<>(col); for (int range = 0; range < ranges.length; ++range) { @@ -465,12 +463,6 @@ static void testProps() { } out.println(""); } - Collator c = Collator.getInstance(ULocale.ENGLISH); - ((RuleBasedCollator) c).setNumericCollation(true); - - // int enumValue = UCharacter.getIntPropertyValue(codePoint, propEnum); - // return UCharacter.getPropertyValueName(propEnum,enumValue, (int)nameChoice); - } private static String getName(int index, String valueName, String shortValueName) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/icu/LDMLComparator.java b/tools/cldr-code/src/main/java/org/unicode/cldr/icu/LDMLComparator.java index 4bb2b6d1e10..c8acf50117c 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/icu/LDMLComparator.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/icu/LDMLComparator.java @@ -13,7 +13,6 @@ import com.ibm.icu.text.Collator; import com.ibm.icu.text.DecimalFormat; import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.util.ULocale; import java.io.File; import java.io.FileOutputStream; @@ -28,6 +27,7 @@ import java.util.TreeMap; import java.util.TreeSet; import java.util.Vector; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.LDMLUtilities; import org.w3c.dom.Document; import org.w3c.dom.NamedNodeMap; @@ -114,12 +114,7 @@ public static void main(String[] args) { } static Collator getDefaultCollation() { - // if (DEFAULT_COLLATION != null) return DEFAULT_COLLATION; - RuleBasedCollator temp = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); - temp.setStrength(Collator.IDENTICAL); - temp.setNumericCollation(true); - // DEFAULT_COLLATION = temp; - return temp; + return CollatorHelper.ROOT_NUMERIC_IDENTICAL; } Hashtable optionTable = new Hashtable<>(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java index a76bfbc9c87..c3f7317d03b 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CLDRModify.java @@ -55,6 +55,7 @@ import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CLDRTool; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.DateTimeCanonicalizer; import org.unicode.cldr.util.DateTimeCanonicalizer.DateTimePatternType; import org.unicode.cldr.util.DowngradePaths; @@ -3309,8 +3310,7 @@ private static int stepsFromRoot(String origLoc) { /** Internal */ public static void testJavaSemantics() { - Collator caseInsensitive = Collator.getInstance(ULocale.ROOT); - caseInsensitive.setStrength(Collator.SECONDARY); + Collator caseInsensitive = CollatorHelper.ROOT_SECONDARY; Set setWithCaseInsensitive = new TreeSet<>(caseInsensitive); setWithCaseInsensitive.addAll(Arrays.asList(new String[] {"a", "b", "c"})); Set plainSet = new TreeSet<>(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java index b97bf06d9fc..425cf126feb 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartCollation.java @@ -258,8 +258,6 @@ private void addCollator(Map data, String type, RuleBasedCollator dataItem.collator = col; } - // RuleBasedCollator ROOT = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - private class Subchart extends Chart { private static final String HIGH_COLLATION_PRIMARY = "\uFFFF"; String title; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java index 086dbaebe6c..a7d4d8e5ff2 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartLanguageGroups.java @@ -3,8 +3,6 @@ import com.google.common.collect.ImmutableSet; import com.google.common.collect.ImmutableSet.Builder; import com.google.common.collect.Multimap; -import com.ibm.icu.text.Collator; -import com.ibm.icu.util.ULocale; import java.io.IOException; import java.util.Collection; import java.util.Comparator; @@ -76,8 +74,6 @@ public String getExplanation() { + "The data doesn't completely match wikipedia’s; there are some patches for CLDR languages.

\n"; } - Collator ENGLISH_ORDER = Collator.getInstance(ULocale.ENGLISH); - @Override public void writeContents(FormattedFileWriter pw) throws IOException { @@ -112,7 +108,9 @@ private void show(Multimap lg, String parent, TablePrinter table new Comparator>() { @Override public int compare(Pair o1, Pair o2) { - int diff = ENGLISH_ORDER.compare(o1.getFirst(), o2.getFirst()); + int diff = + CollatorHelper.ROOT_COLLATOR.compare( + o1.getFirst(), o2.getFirst()); if (diff != 0) { return diff; } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CheckAnnotations.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CheckAnnotations.java index 16e640dcbdd..cdb25c9ea4e 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CheckAnnotations.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CheckAnnotations.java @@ -4,14 +4,12 @@ import java.util.TreeSet; import org.unicode.cldr.util.Annotations; import org.unicode.cldr.util.Annotations.AnnotationSet; -import org.unicode.cldr.util.CLDRConfig; +import org.unicode.cldr.util.CollatorHelper; public class CheckAnnotations { public static void main(String[] args) { AnnotationSet data = Annotations.getDataSet("en"); - CLDRConfig config = CLDRConfig.getInstance(); - // UnicodeMap data2 = Annotations.getData("de"); - Set sorted = new TreeSet<>(config.getCollator()); + Set sorted = new TreeSet<>(CollatorHelper.EMOJI_COLLATOR); int i = 0; boolean needMore = true; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java index a911228360d..a170825defd 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareData.java @@ -1,9 +1,6 @@ package org.unicode.cldr.tool; import com.ibm.icu.dev.util.UOption; -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.RuleBasedCollator; -import com.ibm.icu.util.ULocale; import java.util.HashSet; import java.util.Iterator; import java.util.Set; @@ -30,12 +27,6 @@ public class CompareData { String[] directoryList = {"main", "collation", "segmentations"}; - static RuleBasedCollator uca = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - - { - uca.setNumericCollation(true); - } - static PrettyPath prettyPathMaker = new PrettyPath(); static CLDRFile english; static Set locales; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareEmoji.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareEmoji.java index ab807fd4067..3e5693f9398 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareEmoji.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CompareEmoji.java @@ -4,7 +4,6 @@ import com.google.common.base.Splitter; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; -import com.ibm.icu.text.Collator; import java.io.BufferedReader; import java.io.File; import java.io.IOException; @@ -16,6 +15,7 @@ import org.unicode.cldr.util.CLDRConfig; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Emoji; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.SimpleFactory; @@ -29,11 +29,12 @@ public class CompareEmoji { static final Factory FACTORY_DERIVED = SimpleFactory.make(paths, ".*"); private static final Joiner BAR_JOINER = Joiner.on(" | "); - private static final Collator collator = CLDRConfig.getInstance().getCollator(); + private static final String base = "/Users/markdavis/github/private/DATA/cldr-private/emoji_diff/"; private static final Set sorted = - ImmutableSet.copyOf(Emoji.getAllRgi().addAllTo(new TreeSet<>(collator))); + ImmutableSet.copyOf( + Emoji.getAllRgi().addAllTo(new TreeSet<>(CollatorHelper.EMOJI_COLLATOR))); enum Status { regular, @@ -155,7 +156,7 @@ public static Map> loadItems( continue; } String key = split[0]; - Set values = new TreeSet<>(collator); + Set values = new TreeSet<>(CollatorHelper.EMOJI_COLLATOR); for (int i = 1; i < split.length; ++i) { values.add(split[i]); } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java index 63607e5be21..08e61d4f566 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ConvertLanguageData.java @@ -6,9 +6,7 @@ import com.ibm.icu.impl.Relation; import com.ibm.icu.impl.Row; import com.ibm.icu.impl.Row.R2; -import com.ibm.icu.text.Collator; import com.ibm.icu.text.NumberFormat; -import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.UTF16; import com.ibm.icu.util.ULocale; import java.io.BufferedReader; @@ -42,6 +40,7 @@ import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.Iso639Data; import org.unicode.cldr.util.Iso639Data.Scope; @@ -1924,11 +1923,6 @@ public String toString() { public static class GeneralCollator implements Comparator { static UTF16.StringComparator cpCompare = new UTF16.StringComparator(true, false, 0); - static RuleBasedCollator UCA = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - - static { - UCA.setNumericCollation(true); - } @Override public int compare(String s1, String s2) { @@ -1937,7 +1931,7 @@ public int compare(String s1, String s2) { } else if (s2 == null) { return 1; } - int result = UCA.compare(s1, s2); + int result = CollatorHelper.ROOT_NUMERIC.compare(s1, s2); if (result != 0) return result; return cpCompare.compare(s1, s2); } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java index 1912babd5c5..a0200b9ad3c 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/CountItems.java @@ -19,7 +19,6 @@ import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.Transform; import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.ULocale; import java.io.BufferedReader; import java.io.File; import java.io.IOException; @@ -45,6 +44,7 @@ import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.Iso639Data; import org.unicode.cldr.util.IsoCurrencyParser; @@ -66,8 +66,7 @@ /** Simple program to count the amount of data in CLDR. Internal Use. */ public class CountItems { - private static final Collator ROOT_PRIMARY_COLLATOR = - Collator.getInstance(ULocale.ROOT).setStrength2(Collator.PRIMARY); + private static final Collator ROOT_PRIMARY_COLLATOR = CollatorHelper.ROOT_PRIMARY; static final String needsTranslationString = "America/Buenos_Aires " // America/Rio_Branco @@ -318,8 +317,7 @@ public static void genSupplementalZoneData() throws IOException { } public static void genSupplementalZoneData(boolean skipUnaliased) throws IOException { - RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(); - col.setNumericCollation(true); + RuleBasedCollator col = CollatorHelper.ROOT_NUMERIC; StandardCodes sc = StandardCodes.make(); Map zone_country = sc.getZoneToCounty(); Map> country_zone = sc.getCountryToZoneSet(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ExtractMessages.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ExtractMessages.java index 9db2199884c..3fd97cc8c16 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ExtractMessages.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ExtractMessages.java @@ -205,10 +205,11 @@ public void handlePathValue(String path, String value) { } } - public static Collator col = Collator.getInstance(ULocale.ENGLISH); + public static Collator col = Collator.getInstance(ULocale.ROOT); // freeze below static { col.setStrength(Collator.SECONDARY); + col = col.freeze(); } private static OtherHandler otherHandler = new OtherHandler(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/FormattedFileWriter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/FormattedFileWriter.java index e02fec0b9ce..50f389630bb 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/FormattedFileWriter.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/FormattedFileWriter.java @@ -2,7 +2,6 @@ import com.ibm.icu.text.Collator; import com.ibm.icu.util.ICUUncheckedIOException; -import com.ibm.icu.util.ULocale; import java.io.IOException; import java.io.PrintWriter; import java.io.StringWriter; @@ -16,11 +15,11 @@ import org.unicode.cldr.util.ArrayComparator; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; public class FormattedFileWriter extends java.io.Writer { public static final String CHART_TARGET_DIR = CLDRPaths.CHART_DIRECTORY + "/supplemental/"; - public static final Collator COL = - Collator.getInstance(ULocale.ROOT).setStrength2(Collator.IDENTICAL); + public static final Collator COL = CollatorHelper.ROOT_IDENTICAL; // public static final PairComparator PC = new PairComparator(COL, null); public static final ArrayComparator PC = new ArrayComparator(COL); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java index 9bb41380744..4019c5ba1b8 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateComparison.java @@ -17,6 +17,7 @@ import org.unicode.cldr.util.CLDRFile.Status; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Counter; import org.unicode.cldr.util.EscapingUtilities; import org.unicode.cldr.util.Factory; @@ -29,7 +30,7 @@ public class GenerateComparison { private static PrettyPath prettyPathMaker; - private static Collator collator = Collator.getInstance(); + private static Collator collator = CollatorHelper.ROOT_COLLATOR; static class EnglishRowComparator implements Comparator> { private static Comparator unicode = new UTF16.StringComparator(true, false, 0); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateSidewaysView.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateSidewaysView.java index 99d76e44af3..7166e514950 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateSidewaysView.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateSidewaysView.java @@ -17,7 +17,6 @@ import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UScript; import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.Collator; import com.ibm.icu.text.Normalizer; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.RuleBasedNumberFormat; @@ -48,6 +47,7 @@ import org.unicode.cldr.util.CLDRFile.Status; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.DtdData; import org.unicode.cldr.util.DtdData.Attribute; import org.unicode.cldr.util.DtdData.AttributeStatus; @@ -141,24 +141,16 @@ static int getFirstScript(UnicodeSet exemplars) { static Comparator UCA; static { - RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - UCA2.setNumericCollation(true); - UCA2.setStrength(Collator.IDENTICAL); UCA = new org.unicode.cldr.util.MultiComparator( - UCA2, new UTF16.StringComparator(true, false, 0)); + CollatorHelper.ROOT_NUMERIC_IDENTICAL, + new UTF16.StringComparator(true, false, 0)); } private static Map>> path_value_locales = new TreeMap<>(); private static long startTime = System.currentTimeMillis(); - static RuleBasedCollator standardCollation = - (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); - - static { - standardCollation.setStrength(Collator.IDENTICAL); - standardCollation.setNumericCollation(true); - } + static RuleBasedCollator standardCollation = CollatorHelper.ROOT_NUMERIC_IDENTICAL; private static CLDRFile english; // private static DataShower dataShower = new DataShower(); @@ -678,7 +670,7 @@ private static String cleanLocale(String item, boolean name) { // .setCompressRanges(true) // .setToQuote(ALL_CHARS) // .setQuoter(MyTransform) - // .format(lastChars); + // .format(lastChars).freeze(); // exemplarsWithoutBrackets = exemplarsWithoutBrackets.substring(1, // exemplarsWithoutBrackets.length() - 1); // return exemplarsWithoutBrackets; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java index b3115ee26b0..79c722f6261 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateStatistics.java @@ -26,6 +26,7 @@ import org.unicode.cldr.util.ArrayComparator; import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.LanguageTagParser; import org.unicode.cldr.util.Log; @@ -40,7 +41,7 @@ class GenerateStatistics { static CLDRFile english; static Factory factory; static LanguageTagParser ltp = new LanguageTagParser(); - static Collator col = Collator.getInstance(ULocale.ENGLISH); + static Collator col = CollatorHelper.ROOT_COLLATOR; static boolean notitlecase = true; public static void generateSize( @@ -344,7 +345,7 @@ private static void addCounts( private static class LanguageList implements Comparable { Object[] contents; - static Collator col = Collator.getInstance(ULocale.ENGLISH); + static Collator col = CollatorHelper.ROOT_COLLATOR; static Comparator comp = new ArrayComparator(new Collator[] {col, col, null}); LanguageList(String locale, String englishName, String localName) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateTransformCharts.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateTransformCharts.java index a30d01372c9..39a5c8fda44 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateTransformCharts.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/GenerateTransformCharts.java @@ -10,14 +10,11 @@ import com.google.common.collect.TreeMultimap; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UScript; -import com.ibm.icu.text.Collator; import com.ibm.icu.text.Normalizer; -import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.Transliterator; import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; -import com.ibm.icu.util.ULocale; import java.io.File; import java.io.IOException; import java.io.PrintWriter; @@ -38,6 +35,7 @@ import org.unicode.cldr.util.CLDRTransforms.ParsedTransformID; import org.unicode.cldr.util.CLDRURLS; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.FileCopier; import org.unicode.cldr.util.Pair; import org.unicode.cldr.util.TransliteratorUtilities; @@ -119,7 +117,7 @@ public static void main(String[] args) throws IOException { // Transliterator anyToLatin = Transliterator.getInstance("any-latin"); // // UnicodeSet failNorm = new UnicodeSet(); - // // Collator sc = Collator.getInstance(ULocale.ENGLISH); + // // Collator sc = Collator.getInstance(ULocale.ROOT).freeze(); // // sc.setStrength(Collator.IDENTICAL); // Comparator sc = new UTF16.StringComparator(true, false, 0); // Set latinFail = new TreeSet(new ArrayComparator(new Comparator[] { sc, sc, sc })); @@ -515,12 +513,10 @@ private static boolean isLatin(String target) { static Comparator UCA; static { - RuleBasedCollator UCA2 = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - UCA2.setNumericCollation(true); - UCA2.setStrength(Collator.IDENTICAL); UCA = new org.unicode.cldr.util.MultiComparator( - UCA2, new UTF16.StringComparator(true, false, 0)); + CollatorHelper.ROOT_NUMERIC_IDENTICAL, + new UTF16.StringComparator(true, false, 0)); } private static void showLatin(Pair scriptChoice, Set targetVariant) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/MakeTransliterator.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/MakeTransliterator.java index cc8f053e63a..a7774227d8c 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/MakeTransliterator.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/MakeTransliterator.java @@ -23,6 +23,7 @@ import java.util.TreeSet; import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Pair; /** @@ -49,7 +50,7 @@ public class MakeTransliterator { static NumberFormat nf = NumberFormat.getInstance(ULocale.ENGLISH); - static Collator col = Collator.getInstance(ULocale.ROOT); + static Collator col = CollatorHelper.ROOT_COLLATOR; static String cldrDataDir = "C:\\cvsdata\\unicode\\cldr\\tools\\java\\org\\unicode\\cldr\\util\\data\\transforms\\"; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/Misc.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/Misc.java index 0d452f5da07..f8feea691c0 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/Misc.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/Misc.java @@ -39,6 +39,7 @@ import org.unicode.cldr.util.CLDRFile; import org.unicode.cldr.util.CLDRPaths; import org.unicode.cldr.util.CldrUtility; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Factory; import org.unicode.cldr.util.Iso3166Data; import org.unicode.cldr.util.LanguageTagParser; @@ -466,8 +467,7 @@ private static void printCurrentTimezoneLocalizations(Set languages) } static void printZoneAliases() { - RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); - col.setNumericCollation(true); + RuleBasedCollator col = CollatorHelper.ROOT_NUMERIC; StandardCodes sc = StandardCodes.make(); Map zone_countries = sc.getZoneToCounty(); Map old_new = sc.getZoneLinkold_new(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowData.java index 5bcc66c7d54..af4f37e87a8 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ShowData.java @@ -10,12 +10,9 @@ import com.ibm.icu.dev.util.UOption; import com.ibm.icu.impl.Relation; import com.ibm.icu.lang.UScript; -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.Transliterator; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; -import com.ibm.icu.util.ULocale; import java.io.BufferedReader; import java.io.IOException; import java.io.PrintWriter; @@ -79,12 +76,6 @@ public static String dateFooter() { + System.lineSeparator(); } - static RuleBasedCollator uca = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - - { - uca.setNumericCollation(true); - } - static PathHeader.Factory prettyPathMaker = PathHeader.getFactory(CLDRConfig.getInstance().getEnglish()); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/TablePrinter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/TablePrinter.java index b86f5ccb049..2a24e4acf47 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/TablePrinter.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/TablePrinter.java @@ -11,6 +11,7 @@ import java.util.Collection; import java.util.Comparator; import java.util.List; +import org.unicode.cldr.util.CollatorHelper; public class TablePrinter { @@ -276,7 +277,7 @@ public String toTable() { static class ColumnSorter implements Comparator { private int[] sortPriorities = new int[0]; private BitSet ascending = new BitSet(); - Collator englishCollator = Collator.getInstance(ULocale.ENGLISH); + Collator englishCollator = CollatorHelper.ROOT_COLLATOR; @Override @SuppressWarnings("unchecked") diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRConfig.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRConfig.java index 846d8f86ade..e35af38a226 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRConfig.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRConfig.java @@ -8,7 +8,6 @@ import com.ibm.icu.dev.test.TestLog; import com.ibm.icu.text.Collator; import com.ibm.icu.text.RuleBasedCollator; -import com.ibm.icu.util.ULocale; import com.ibm.icu.util.VersionInfo; import java.io.File; import java.io.FilenameFilter; @@ -340,8 +339,7 @@ private static final RuleBasedCollator make() { try { colRoot = new RuleBasedCollator(rules); } catch (Exception e) { - colRoot = (RuleBasedCollator) getInstance().getCollator(); - return colRoot; + return CollatorHelper.EMOJI_COLLATOR; } colRoot.setStrength(Collator.IDENTICAL); colRoot.setNumericCollation(true); @@ -359,37 +357,6 @@ public final Comparator getComparatorRoot() { return (Comparator) (getCollatorRoot()); } - private static final class CollatorHelper { - static final Collator EMOJI_COLLATOR = makeEmojiCollator(); - - private static final Collator makeEmojiCollator() { - final RuleBasedCollator col = - (RuleBasedCollator) - Collator.getInstance(ULocale.forLanguageTag("en-u-co-emoji")); - col.setStrength(Collator.IDENTICAL); - col.setNumericCollation(true); - col.freeze(); - return col; - } - - static final Collator ROOT_NUMERIC = makeRootNumeric(); - - private static final Collator makeRootNumeric() { - RuleBasedCollator _ROOT_COL = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); - _ROOT_COL.setNumericCollation(true); - _ROOT_COL.freeze(); - return _ROOT_COL; - } - } - - public Collator getCollator() { - return CollatorHelper.EMOJI_COLLATOR; - } - - public Collator getRootNumeric() { - return CollatorHelper.ROOT_NUMERIC; - } - public synchronized Phase getPhase() { if (phase == null) { if (getEnvironment() == Environment.UNITTEST) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CollationMapMaker.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CollationMapMaker.java index cb1958a79c1..ae8ee557760 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CollationMapMaker.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CollationMapMaker.java @@ -20,7 +20,6 @@ import com.ibm.icu.text.UTF16; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; -import com.ibm.icu.util.ULocale; import java.util.ArrayList; import java.util.Collection; import java.util.Comparator; @@ -199,7 +198,7 @@ public UnicodeMap getUnicodeMap() { } static final boolean showDetails = false; - static final RuleBasedCollator uca = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + static final RuleBasedCollator uca = CollatorHelper.ROOT_COLLATOR; static final UnicodeSet filteredChars = new UnicodeSet( "[{ss}[^[:Co:][:Cf:][:Cc:][:Cn:][:Cs:][:script=Han:][:script=Hangul:]-[:nfkcquickcheck=no:]]]") diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CollatorHelper.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CollatorHelper.java new file mode 100644 index 00000000000..8ad6b96db89 --- /dev/null +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CollatorHelper.java @@ -0,0 +1,67 @@ +package org.unicode.cldr.util; + +import com.ibm.icu.text.Collator; +import com.ibm.icu.text.RuleBasedCollator; +import com.ibm.icu.util.ULocale; + +public final class CollatorHelper { + public static final RuleBasedCollator EMOJI_COLLATOR = makeEmojiCollator(); + public static final RuleBasedCollator ROOT_COLLATOR = makeRootCollator(); + public static final RuleBasedCollator ROOT_IDENTICAL = makeRootIdentical(); + public static final RuleBasedCollator ROOT_NUMERIC = makeRootNumeric(); + public static final RuleBasedCollator ROOT_NUMERIC_IDENTICAL = makeRootNumericIdentical(); + public static final RuleBasedCollator ROOT_PRIMARY = makeRootPrimary(); + public static final RuleBasedCollator ROOT_PRIMARY_SHIFTED = makeRootPrimaryShifted(); + public static final RuleBasedCollator ROOT_SECONDARY = makeRootSecondary(); + + private static RuleBasedCollator makeEmojiCollator() { + ULocale uLocale = ULocale.forLanguageTag("en-u-co-emoji"); + RuleBasedCollator col = (RuleBasedCollator) RuleBasedCollator.getInstance(uLocale); + col.setStrength(RuleBasedCollator.IDENTICAL); + col.setNumericCollation(true); + return (RuleBasedCollator) col.freeze(); + } + + private static RuleBasedCollator makeRootCollator() { + RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + return (RuleBasedCollator) col.freeze(); + } + + private static RuleBasedCollator makeRootIdentical() { + RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + col.setStrength(Collator.IDENTICAL); + return (RuleBasedCollator) col.freeze(); + } + + private static RuleBasedCollator makeRootNumeric() { + RuleBasedCollator col = (RuleBasedCollator) RuleBasedCollator.getInstance(ULocale.ROOT); + col.setNumericCollation(true); + return (RuleBasedCollator) col.freeze(); + } + + private static RuleBasedCollator makeRootNumericIdentical() { + RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + col.setStrength(Collator.IDENTICAL); + col.setNumericCollation(true); + return (RuleBasedCollator) col.freeze(); + } + + private static RuleBasedCollator makeRootPrimary() { + RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + col.setStrength(Collator.PRIMARY); + return (RuleBasedCollator) col.freeze(); + } + + private static RuleBasedCollator makeRootPrimaryShifted() { + RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + col.setStrength(Collator.PRIMARY); + col.setAlternateHandlingShifted(true); + return (RuleBasedCollator) col.freeze(); + } + + private static RuleBasedCollator makeRootSecondary() { + RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + col.setStrength(Collator.SECONDARY); + return (RuleBasedCollator) col.freeze(); + } +} diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java index 7e656dceb24..99ecaea8b9a 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java @@ -1886,7 +1886,7 @@ private static final class UnitOrderHolder { new MapComparator().add("standard", "accounting").freeze(); static Comparator zoneOrder = StandardCodes.make().getTZIDComparator(); - static final Comparator COMP = (Comparator) CLDRConfig.getInstance().getCollator(); + static final Comparator COMP = (Comparator) CollatorHelper.EMOJI_COLLATOR; // Hack for US static final Comparator UNICODE_SET_COMPARATOR = diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/Emoji.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/Emoji.java index 8bf5d4bfc11..31154b2ce49 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/Emoji.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/Emoji.java @@ -33,7 +33,7 @@ import org.unicode.cldr.util.PathHeader.PageId; public class Emoji { - public static final Collator COLLATOR = CLDRConfig.getInstance().getCollator(); + public static final Collator COLLATOR = CollatorHelper.EMOJI_COLLATOR; public static final String EMOJI_VARIANT = "\uFE0F"; public static final char JOINER = '\u200D'; public static final String JOINER_STR = "\u200D"; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/MapComparator.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/MapComparator.java index 5c72b96f1e9..6c8ce9d85d5 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/MapComparator.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/MapComparator.java @@ -8,11 +8,8 @@ */ package org.unicode.cldr.util; -import com.ibm.icu.text.Collator; -import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.Freezable; -import com.ibm.icu.util.ULocale; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; @@ -24,20 +21,6 @@ import java.util.function.Function; public class MapComparator implements Comparator, Freezable> { - private static final class CollatorHelper { - public static final Collator UCA = getUCA(); - - /** - * This does not change, so we can create one and freeze it. - * - * @return - */ - private static Collator getUCA() { - final RuleBasedCollator newUca = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); - newUca.setNumericCollation(true); - return newUca.freeze(); - } - } // initialize this once private Map ordering = new TreeMap<>(); // maps from name to rank @@ -200,7 +183,7 @@ public int compare(K a, K b) { if (a instanceof CharSequence) { if (b instanceof CharSequence) { - int result = CollatorHelper.UCA.compare(a.toString(), b.toString()); + int result = CollatorHelper.ROOT_NUMERIC.compare(a.toString(), b.toString()); if (result != 0) { return result; } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/ReferenceStringSearch.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/ReferenceStringSearch.java index 173e3b7864f..8dd62a73cc7 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/ReferenceStringSearch.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/ReferenceStringSearch.java @@ -11,7 +11,6 @@ import com.ibm.icu.text.CollationElementIterator; import com.ibm.icu.text.Collator; import com.ibm.icu.text.RuleBasedCollator; -import com.ibm.icu.util.ULocale; import java.util.ArrayList; /** @@ -28,7 +27,7 @@ public class ReferenceStringSearch { private static final int PADDING = 3; - private RuleBasedCollator collator = (RuleBasedCollator) Collator.getInstance(ULocale.ROOT); + private RuleBasedCollator collator = CollatorHelper.ROOT_COLLATOR; private BreakIterator breaker; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleUnicodeSetFormatter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleUnicodeSetFormatter.java index 36702429d04..60751020448 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleUnicodeSetFormatter.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleUnicodeSetFormatter.java @@ -40,7 +40,7 @@ public class SimpleUnicodeSetFormatter implements FormatterParser { public static Normalizer2 nfc = Normalizer2.getNFCInstance(); public static final Comparator BASIC_COLLATOR = - (Comparator) CLDRConfig.getInstance().getCollator(); + (Comparator) CollatorHelper.EMOJI_COLLATOR; public static final int DEFAULT_RANGES_ABOVE = 1024; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/TestUtilities.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/TestUtilities.java index 58ffaae5ad1..7ac9a99849d 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/TestUtilities.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/TestUtilities.java @@ -233,14 +233,11 @@ private static void checkNumericTimezone() throws IOException { if (maxNumeric < numeric) maxNumeric = numeric; } // get the differences (and sort them) - RuleBasedCollator eng = (RuleBasedCollator) Collator.getInstance(); - eng.setNumericCollation(true); - - Set extra = new TreeSet<>(eng); + Set extra = new TreeSet<>(CollatorHelper.ROOT_NUMERIC); extra.addAll(map_timezone_integer.keySet()); extra.removeAll(timezones); System.out.println("Extra: " + extra); - Set needed = new TreeSet<>(eng); + Set needed = new TreeSet<>(CollatorHelper.ROOT_NUMERIC); needed.addAll(timezones); needed.removeAll(map_timezone_integer.keySet()); System.out.println("Needed: " + needed); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnicodeSetPrettyPrinter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnicodeSetPrettyPrinter.java index 6ad27753078..e623b2b9159 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnicodeSetPrettyPrinter.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnicodeSetPrettyPrinter.java @@ -15,7 +15,6 @@ import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.text.UnicodeSetIterator; import com.ibm.icu.util.ICUUncheckedIOException; -import com.ibm.icu.util.ULocale; import java.io.IOException; import java.text.FieldPosition; import java.util.Comparator; @@ -52,11 +51,8 @@ public class UnicodeSetPrettyPrinter implements FormatterParser { /** Make from root collator obtained from ICU */ public static final UnicodeSetPrettyPrinter ROOT_ICU = from( - (Comparator) Collator.getInstance(ULocale.ROOT).freeze(), - (Comparator) - Collator.getInstance(ULocale.ROOT) - .setStrength2(Collator.PRIMARY) - .freeze()); + (Comparator) CollatorHelper.ROOT_COLLATOR, + (Comparator) CollatorHelper.ROOT_PRIMARY); /** Make from ICU Locale */ public static UnicodeSetPrettyPrinter fromIcuLocale(String localeId) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyCompactNumbers.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyCompactNumbers.java index 70fd3fb4b69..70e76dd61aa 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyCompactNumbers.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyCompactNumbers.java @@ -100,7 +100,7 @@ public static void main(String[] args) throws IOException { DateTimeFormats.writeCss(DIR); final CLDRFile english = CLDR_CONFIG.getEnglish(); - Map indexMap = new TreeMap<>(CLDR_CONFIG.getCollator()); + Map indexMap = new TreeMap<>(CollatorHelper.EMOJI_COLLATOR); for (String locale : availableLanguages) { if (defaultContentLocales.contains(locale)) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyZones.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyZones.java index 940483f49d2..37985006984 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyZones.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VerifyZones.java @@ -263,7 +263,7 @@ public static void main(String[] args) throws IOException { DateTimeFormats.writeCss(DIR); final CLDRFile english = CLDR_CONFIG.getEnglish(); - Map indexMap = new TreeMap<>(CLDR_CONFIG.getCollator()); + Map indexMap = new TreeMap<>(CollatorHelper.EMOJI_COLLATOR); for (String localeID : factory2.getAvailableLanguages()) { Level level = StandardCodes.make().getLocaleCoverageLevel(organization, localeID); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VettingViewer.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VettingViewer.java index 59213f11100..7d3160a0d2b 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VettingViewer.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VettingViewer.java @@ -1083,7 +1083,7 @@ private void writeSummaryTable( } private Map getSortedNames(Organization org, Level desiredLevel) { - Map sortedNames = new TreeMap<>(CLDRConfig.getInstance().getCollator()); + Map sortedNames = new TreeMap<>(CollatorHelper.EMOJI_COLLATOR); // A user in the Unaffiliated organization can access a list with all non-TC locales. if (org == Organization.unaffiliated && desiredLevel == Level.BASIC) { diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VoteResolver.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VoteResolver.java index e6eab29b6a4..3a89bfb02a2 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/VoteResolver.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/VoteResolver.java @@ -4,7 +4,6 @@ import com.google.common.collect.ImmutableSet; import com.ibm.icu.text.Collator; import com.ibm.icu.util.Output; -import com.ibm.icu.util.ULocale; import java.sql.Timestamp; import java.util.*; import java.util.Map.Entry; @@ -926,7 +925,7 @@ public Map getOrgToVotes(Organization org) { private CLDRLocale locale; private PathHeader pathHeader; - private static final Collator englishCollator = Collator.getInstance(ULocale.ENGLISH).freeze(); + private static final Collator englishCollator = CollatorHelper.ROOT_COLLATOR; /** Used for comparing objects of type T */ private final Comparator objectCollator = @@ -1335,7 +1334,7 @@ private HashMap makeVoteCountMap(Set sortedValues) { * be symmetrical in its handling of hard and soft votes. * *

Note: now that "↑↑↑" is permitted to participate directly in voting resolution, it becomes - * significant that with Collator.getInstance(ULocale.ENGLISH), "↑↑↑" sorts before "AAA" just as + * significant that with Collator.getInstance(ULocale.ROOT), "↑↑↑" sorts before "AAA" just as * "AAA" sorts before "BBB". * * @param sortedValues the set of sorted values, possibly to be modified diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCollationStringByteConverter.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCollationStringByteConverter.java index 67ccfb2137d..b29999f7b8e 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCollationStringByteConverter.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestCollationStringByteConverter.java @@ -8,7 +8,6 @@ package org.unicode.cldr.unittest; import com.ibm.icu.impl.Utility; -import com.ibm.icu.text.Collator; import com.ibm.icu.text.DateFormat; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.SimpleDateFormat; @@ -21,6 +20,7 @@ import java.util.Map; import java.util.TreeMap; import org.unicode.cldr.util.CollationStringByteConverter; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Dictionary; import org.unicode.cldr.util.Dictionary.DictionaryBuilder; import org.unicode.cldr.util.Dictionary.DictionaryCharList; @@ -335,9 +335,7 @@ private static String show(String test, ParsePosition parsePosition) { } public static void check() throws Exception { - final RuleBasedCollator col = (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); - col.setStrength(Collator.PRIMARY); - col.setAlternateHandlingShifted(true); + final RuleBasedCollator col = CollatorHelper.ROOT_PRIMARY_SHIFTED; CollationStringByteConverter converter = new CollationStringByteConverter(col, new Utf8StringByteConverter()); // new // ByteString(true) diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestHelper.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestHelper.java index d3bf6594912..99061ead941 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestHelper.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestHelper.java @@ -7,9 +7,7 @@ import com.ibm.icu.impl.Utility; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UProperty; -import com.ibm.icu.text.Collator; import com.ibm.icu.text.UnicodeSet; -import com.ibm.icu.util.ULocale; import java.io.StringWriter; import java.text.NumberFormat; import java.util.ArrayList; @@ -211,11 +209,9 @@ public void TestCounter() { Counter counter = new Counter<>(true); Comparator uca = new Comparator<>() { - Collator col = Collator.getInstance(ULocale.ENGLISH); - @Override public int compare(String o1, String o2) { - return col.compare(o1, o2); + return CollatorHelper.ROOT_COLLATOR.compare(o1, o2); } }; InverseComparator ucaDown = new InverseComparator(uca); diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestReferenceStringSearch.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestReferenceStringSearch.java index bac085b231b..cddbac71941 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestReferenceStringSearch.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestReferenceStringSearch.java @@ -2,17 +2,16 @@ import com.ibm.icu.impl.Utility; import com.ibm.icu.text.BreakIterator; -import com.ibm.icu.text.Collator; import com.ibm.icu.text.NumberFormat; import com.ibm.icu.text.RuleBasedCollator; import com.ibm.icu.text.SearchIterator; import com.ibm.icu.text.StringCharacterIterator; import com.ibm.icu.text.StringSearch; -import com.ibm.icu.util.ULocale; import java.text.CharacterIterator; import java.util.Map; import java.util.TreeMap; import org.unicode.cldr.util.CollationMapMaker; +import org.unicode.cldr.util.CollatorHelper; import org.unicode.cldr.util.Dictionary; import org.unicode.cldr.util.Dictionary.DictionaryCharList; import org.unicode.cldr.util.ReferenceStringSearch; @@ -37,12 +36,7 @@ public static final void main(String[] args) { } static final RuleBasedCollator TEST_COLLATOR = - (RuleBasedCollator) Collator.getInstance(ULocale.ENGLISH); - - static { - TEST_COLLATOR.setStrength(Collator.PRIMARY); - TEST_COLLATOR.setAlternateHandlingShifted(true); // ignore puncuation - } + CollatorHelper.ROOT_PRIMARY_SHIFTED; // ignore puncuation static final BreakIterator TEST_BREAKER = BreakIterator.getCharacterInstance();