From a814c25384e3767f82a67b4c54c812b4a82e3acd Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Tue, 23 Jan 2024 08:39:13 -0600 Subject: [PATCH] CLDR-17313 intern xpaths: update per code review - don't intern in StringId - simplify intern logic with lambdas --- .../java/org/unicode/cldr/util/CLDRFile.java | 90 ++++++++++--------- .../org/unicode/cldr/util/CharUtilities.java | 32 ++----- .../java/org/unicode/cldr/util/StringId.java | 2 +- 3 files changed, 55 insertions(+), 69 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java index b1a0fd99075..ace78fa059d 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java @@ -55,6 +55,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.unicode.cldr.test.CheckMetazones; import org.unicode.cldr.util.DayPeriodInfo.DayPeriod; import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; @@ -3156,8 +3157,8 @@ public Set getRawExtraPaths() { if (extraPaths == null) { extraPaths = ImmutableSet.builder() - .addAll(CharUtilities.internAll(getRawExtraPathsPrivate())) - .addAll(CharUtilities.internAll(CONST_EXTRA_PATHS)) + .addAll(getRawExtraPathsPrivate()) + .addAll(CONST_EXTRA_PATHS) .build(); if (DEBUG) { System.out.println(getLocaleID() + "\textras: " + extraPaths.size()); @@ -3186,7 +3187,7 @@ public Set getRawExtraPaths() { * client code. Make sure that updates here are reflected there and vice versa. *

Reference: https://unicode-org.atlassian.net/browse/CLDR-11238 */ - private Set getRawExtraPathsPrivate() { + private List getRawExtraPathsPrivate() { Set toAddTo = new HashSet<>(); SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo(); // units @@ -3377,7 +3378,7 @@ private Set getRawExtraPathsPrivate() { } } } - return toAddTo; + return toAddTo.stream().map(String::intern).collect(Collectors.toList()); } private void addPluralCounts( @@ -4307,44 +4308,45 @@ private String getStringValueWithBaileyNotConstructed(String path) { * TestPaths.extraPathAllowsNullValue */ static final Set CONST_EXTRA_PATHS = - ImmutableSet.of( - // Individual zone overrides — were in getRawExtraPaths - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic", - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard", - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard", - "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard", - // Person name paths - "//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]", - "//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]"); + CharUtilities.internImmutableSet( + Set.of( + // Individual zone overrides — were in getRawExtraPaths + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic", + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard", + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard", + "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard", + // Person name paths + "//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]", + "//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]")); } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java index 6bfa75f8968..90effb5c73e 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java @@ -1,31 +1,10 @@ package org.unicode.cldr.util; -import java.util.Iterator; +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; public class CharUtilities { - /** intern everything in the src iterable */ - public static Iterable internAll(final Iterable src) { - return new Iterable() { - @Override - public Iterator iterator() { - return CharUtilities.internAll(src.iterator()); - } - }; - } - /** intern everything in the src iterator */ - protected static Iterator internAll(final Iterator iterator) { - return new Iterator() { - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public String next() { - return iterator.next().intern(); - } - }; - } /** * Simple wrapper for CharSequence * @@ -168,4 +147,9 @@ public static int compare(CharSequence text1, CharSequence text2) { } } } + + /** intern each element in the string and return a new unmodifiable Set */ + public static Set internImmutableSet(Collection s) { + return s.stream().map(String::intern).collect(Collectors.toUnmodifiableSet()); + } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java index c6caa6d01eb..4db909f9b6d 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java @@ -34,7 +34,7 @@ public final class StringId { * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL. */ public static long getId(CharSequence charSequence) { - String string = charSequence.toString().intern(); + String string = charSequence.toString(); Long resultLong = STRING_TO_ID.get(string); if (resultLong != null) { return resultLong;