diff --git a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java index 257d97d1700..f598cea45d6 100644 --- a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java +++ b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java @@ -269,6 +269,7 @@ private synchronized void addXpaths(Set xpaths, Connection conn) throws * @return the xpath's id (as an Integer) */ private synchronized Integer addXpath(String xpath, boolean addIfNotFound, Connection inConn) { + xpath = xpath.intern(); Integer nid = stringToId.get(xpath); // double check if (nid != null) { return nid; @@ -372,6 +373,7 @@ public final String getById(int id) { * //ldml/dates/timeZoneNames/zone[@type="America/Guadeloupe"]/short/daylight */ public final void setById(int id, String xpath) { + xpath = xpath.intern(); stringToId.put(idToString_put(id, xpath), id); sidToString.put(getStringID(xpath), xpath); } @@ -383,6 +385,7 @@ public final void setById(int id, String xpath) { * @return the id, like 692804, for the specified path */ public final int getByXpath(String xpath) { + xpath = xpath.intern(); Integer nid = stringToId.get(xpath); if (nid != null) { return nid.intValue(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java index e63887428a4..ace78fa059d 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java @@ -55,6 +55,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.unicode.cldr.test.CheckMetazones; import org.unicode.cldr.util.DayPeriodInfo.DayPeriod; import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; @@ -3186,7 +3187,7 @@ public Set getRawExtraPaths() { * client code. Make sure that updates here are reflected there and vice versa. *

Reference: https://unicode-org.atlassian.net/browse/CLDR-11238 */ - private Set getRawExtraPathsPrivate() { + private List getRawExtraPathsPrivate() { Set toAddTo = new HashSet<>(); SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo(); // units @@ -3377,7 +3378,7 @@ private Set getRawExtraPathsPrivate() { } } } - return toAddTo; + return toAddTo.stream().map(String::intern).collect(Collectors.toList()); } private void addPluralCounts( @@ -4307,44 +4308,45 @@ private String getStringValueWithBaileyNotConstructed(String path) { * TestPaths.extraPathAllowsNullValue */ static final Set CONST_EXTRA_PATHS = - ImmutableSet.of( - // Individual zone overrides — were in getRawExtraPaths - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic", - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard", - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard", - "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard", - // Person name paths - "//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]", - "//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]"); + CharUtilities.internImmutableSet( + Set.of( + // Individual zone overrides — were in getRawExtraPaths + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic", + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard", + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard", + "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard", + // Person name paths + "//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]", + "//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]")); } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java index fb763f97bbd..90effb5c73e 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java @@ -1,7 +1,10 @@ package org.unicode.cldr.util; -public class CharUtilities { +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; +public class CharUtilities { /** * Simple wrapper for CharSequence * @@ -144,4 +147,9 @@ public static int compare(CharSequence text1, CharSequence text2) { } } } + + /** intern each element in the string and return a new unmodifiable Set */ + public static Set internImmutableSet(Collection s) { + return s.stream().map(String::intern).collect(Collectors.toUnmodifiableSet()); + } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java index b08c2a8229a..e97ba9d9484 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java @@ -108,11 +108,12 @@ public XMLSource cloneAsThawed() { @Override public void putFullPathAtDPath(String distinguishingXPath, String fullxpath) { - xpath_fullXPath.put(distinguishingXPath, fullxpath); + xpath_fullXPath.put(distinguishingXPath.intern(), fullxpath.intern()); } @Override public void putValueAtDPath(String distinguishingXPath, String value) { + distinguishingXPath = distinguishingXPath.intern(); String oldValue = xpath_value.get(distinguishingXPath); xpath_value.put(distinguishingXPath, value); updateValuePathMapping(distinguishingXPath, oldValue, value); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java index 4df1bbf5959..6fae02e1d48 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java @@ -261,6 +261,7 @@ public boolean isFrozen() { * @param value */ public String putValueAtPath(String xpath, String value) { + xpath = xpath.intern(); if (locked) { throw new UnsupportedOperationException("Attempt to modify locked object"); } @@ -493,7 +494,8 @@ private TreeMap loadAliases() { if (!Alias.isAliasPath(path)) { continue; } - String fullPath = getFullPathAtDPath(path); + path = path.intern(); + String fullPath = getFullPathAtDPath(path).intern(); Alias temp = Alias.make(fullPath); if (temp == null) { continue; @@ -892,6 +894,7 @@ public Date getChangeDateAtDPath(String xpath) { private String getFullPath( String xpath, AliasLocation fullStatus, String fullPathWhereFound) { String result = null; + xpath = xpath.intern(); if (this.cachingIsEnabled) { result = getFullPathAtDPathCache.get(xpath); } @@ -1078,6 +1081,7 @@ private AliasLocation getPathLocation( boolean skipFirst, boolean skipInheritanceMarker, List list) { + xpath = xpath.intern(); // When calculating the Bailey values, we track the final // return value as firstValue. If non-null, this will become @@ -1145,6 +1149,7 @@ private AliasLocation getPathLocation( aliasedPath = aliases.get(possibleSubpath) + xpath.substring(possibleSubpath.length()); + aliasedPath = aliasedPath.intern(); if (list != null) { // It's an explicit alias, just at a parent element (subset xpath) list.add( @@ -1165,7 +1170,7 @@ private AliasLocation getPathLocation( // alts are special; they act like there is a root alias to the path without the alt. if (aliasedPath == null && xpath.contains("[@alt=")) { - aliasedPath = XPathParts.getPathWithoutAlt(xpath); + aliasedPath = XPathParts.getPathWithoutAlt(xpath).intern(); if (list != null) { list.add( new LocaleInheritanceInfo( @@ -1178,10 +1183,10 @@ private AliasLocation getPathLocation( // //ldml/numbers/currencies/currency[@type="BRZ"]/displayName[@count="other"] => // //ldml/numbers/currencies/currency[@type="BRZ"]/displayName if (aliasedPath == null && xpath.contains("[@count=")) { - aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]"); + aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]").intern(); if (aliasedPath.equals(xpath)) { if (xpath.contains("/displayName")) { - aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll(""); + aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("").intern(); if (aliasedPath.equals(xpath)) { throw new RuntimeException("Internal error"); } @@ -1290,7 +1295,7 @@ private Set findNonAliasedPaths() { // others for (XMLSource curSource : sourceList) { for (String xpath : curSource) { - paths.add(xpath); + paths.add(xpath.intern()); } } return paths; @@ -1328,7 +1333,7 @@ private Set getDirectAliases(String[] paths) { String suffix = xpath.substring(suffixStart); for (String reverseAlias : list) { String reversePath = reverseAlias + suffix; - newPaths.add(reversePath); + newPaths.add(reversePath.intern()); } endIndex++; } @@ -1967,7 +1972,7 @@ public XMLSource add(String currentFullXPath, String value) { LOG_PROGRESS, "ADDING: \t" + currentFullXPath + " \t" + value + "\t" + currentFullXPath); try { - this.putValueAtPath(currentFullXPath, value); + this.putValueAtPath(currentFullXPath.intern(), value); } catch (RuntimeException e) { throw new IllegalArgumentException( "failed adding " + currentFullXPath + ",\t" + value, e); diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java index 4fe5082182e..d30a5bcbd18 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java @@ -352,4 +352,24 @@ public void testGetPaths() { .getPathsWhereFound(GERMAN_IN_SWITZERLAND)); } } + + @Test + public void TestInternedPaths() { + { + final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", false); + for (final String s : en.fullIterable()) { + final String s0 = new String(s); + assertTrue(s != s0); + assertTrue(s == s0.intern(), () -> "in unresolved en was not interned: " + s); + } + } + { + final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", true); + for (final String s : en.fullIterable()) { + final String s0 = new String(s); + assertTrue(s != s0); + assertTrue(s == s0.intern(), () -> "in resolved en was not interned: " + s); + } + } + } }