From 145b379199de3d0c9b62f84d83f53172e8629309 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Tue, 23 Jan 2024 06:36:29 -0600 Subject: [PATCH] CLDR-17314 String.intern() across multiple locations - Unit test to verify that CLDRFile paths are interned - interning in XPathTable and StringId --- .../java/org/unicode/cldr/web/XPathTable.java | 3 +++ .../java/org/unicode/cldr/util/CLDRFile.java | 4 ++-- .../org/unicode/cldr/util/CharUtilities.java | 24 +++++++++++++++++++ .../unicode/cldr/util/SimpleXMLSource.java | 3 ++- .../java/org/unicode/cldr/util/StringId.java | 2 +- .../java/org/unicode/cldr/util/XMLSource.java | 19 +++++++++------ .../org/unicode/cldr/util/TestCLDRFile.java | 20 ++++++++++++++++ 7 files changed, 64 insertions(+), 11 deletions(-) diff --git a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java index 257d97d1700..f598cea45d6 100644 --- a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java +++ b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java @@ -269,6 +269,7 @@ private synchronized void addXpaths(Set xpaths, Connection conn) throws * @return the xpath's id (as an Integer) */ private synchronized Integer addXpath(String xpath, boolean addIfNotFound, Connection inConn) { + xpath = xpath.intern(); Integer nid = stringToId.get(xpath); // double check if (nid != null) { return nid; @@ -372,6 +373,7 @@ public final String getById(int id) { * //ldml/dates/timeZoneNames/zone[@type="America/Guadeloupe"]/short/daylight */ public final void setById(int id, String xpath) { + xpath = xpath.intern(); stringToId.put(idToString_put(id, xpath), id); sidToString.put(getStringID(xpath), xpath); } @@ -383,6 +385,7 @@ public final void setById(int id, String xpath) { * @return the id, like 692804, for the specified path */ public final int getByXpath(String xpath) { + xpath = xpath.intern(); Integer nid = stringToId.get(xpath); if (nid != null) { return nid.intValue(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java index e63887428a4..b1a0fd99075 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java @@ -3156,8 +3156,8 @@ public Set getRawExtraPaths() { if (extraPaths == null) { extraPaths = ImmutableSet.builder() - .addAll(getRawExtraPathsPrivate()) - .addAll(CONST_EXTRA_PATHS) + .addAll(CharUtilities.internAll(getRawExtraPathsPrivate())) + .addAll(CharUtilities.internAll(CONST_EXTRA_PATHS)) .build(); if (DEBUG) { System.out.println(getLocaleID() + "\textras: " + extraPaths.size()); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java index fb763f97bbd..6bfa75f8968 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java @@ -1,7 +1,31 @@ package org.unicode.cldr.util; +import java.util.Iterator; + public class CharUtilities { + /** intern everything in the src iterable */ + public static Iterable internAll(final Iterable src) { + return new Iterable() { + @Override + public Iterator iterator() { + return CharUtilities.internAll(src.iterator()); + } + }; + } + /** intern everything in the src iterator */ + protected static Iterator internAll(final Iterator iterator) { + return new Iterator() { + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + @Override + public String next() { + return iterator.next().intern(); + } + }; + } /** * Simple wrapper for CharSequence * diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java index b08c2a8229a..e97ba9d9484 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java @@ -108,11 +108,12 @@ public XMLSource cloneAsThawed() { @Override public void putFullPathAtDPath(String distinguishingXPath, String fullxpath) { - xpath_fullXPath.put(distinguishingXPath, fullxpath); + xpath_fullXPath.put(distinguishingXPath.intern(), fullxpath.intern()); } @Override public void putValueAtDPath(String distinguishingXPath, String value) { + distinguishingXPath = distinguishingXPath.intern(); String oldValue = xpath_value.get(distinguishingXPath); xpath_value.put(distinguishingXPath, value); updateValuePathMapping(distinguishingXPath, oldValue, value); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java index 4db909f9b6d..c6caa6d01eb 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java @@ -34,7 +34,7 @@ public final class StringId { * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL. */ public static long getId(CharSequence charSequence) { - String string = charSequence.toString(); + String string = charSequence.toString().intern(); Long resultLong = STRING_TO_ID.get(string); if (resultLong != null) { return resultLong; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java index 4df1bbf5959..6fae02e1d48 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java @@ -261,6 +261,7 @@ public boolean isFrozen() { * @param value */ public String putValueAtPath(String xpath, String value) { + xpath = xpath.intern(); if (locked) { throw new UnsupportedOperationException("Attempt to modify locked object"); } @@ -493,7 +494,8 @@ private TreeMap loadAliases() { if (!Alias.isAliasPath(path)) { continue; } - String fullPath = getFullPathAtDPath(path); + path = path.intern(); + String fullPath = getFullPathAtDPath(path).intern(); Alias temp = Alias.make(fullPath); if (temp == null) { continue; @@ -892,6 +894,7 @@ public Date getChangeDateAtDPath(String xpath) { private String getFullPath( String xpath, AliasLocation fullStatus, String fullPathWhereFound) { String result = null; + xpath = xpath.intern(); if (this.cachingIsEnabled) { result = getFullPathAtDPathCache.get(xpath); } @@ -1078,6 +1081,7 @@ private AliasLocation getPathLocation( boolean skipFirst, boolean skipInheritanceMarker, List list) { + xpath = xpath.intern(); // When calculating the Bailey values, we track the final // return value as firstValue. If non-null, this will become @@ -1145,6 +1149,7 @@ private AliasLocation getPathLocation( aliasedPath = aliases.get(possibleSubpath) + xpath.substring(possibleSubpath.length()); + aliasedPath = aliasedPath.intern(); if (list != null) { // It's an explicit alias, just at a parent element (subset xpath) list.add( @@ -1165,7 +1170,7 @@ private AliasLocation getPathLocation( // alts are special; they act like there is a root alias to the path without the alt. if (aliasedPath == null && xpath.contains("[@alt=")) { - aliasedPath = XPathParts.getPathWithoutAlt(xpath); + aliasedPath = XPathParts.getPathWithoutAlt(xpath).intern(); if (list != null) { list.add( new LocaleInheritanceInfo( @@ -1178,10 +1183,10 @@ private AliasLocation getPathLocation( // //ldml/numbers/currencies/currency[@type="BRZ"]/displayName[@count="other"] => // //ldml/numbers/currencies/currency[@type="BRZ"]/displayName if (aliasedPath == null && xpath.contains("[@count=")) { - aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]"); + aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]").intern(); if (aliasedPath.equals(xpath)) { if (xpath.contains("/displayName")) { - aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll(""); + aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("").intern(); if (aliasedPath.equals(xpath)) { throw new RuntimeException("Internal error"); } @@ -1290,7 +1295,7 @@ private Set findNonAliasedPaths() { // others for (XMLSource curSource : sourceList) { for (String xpath : curSource) { - paths.add(xpath); + paths.add(xpath.intern()); } } return paths; @@ -1328,7 +1333,7 @@ private Set getDirectAliases(String[] paths) { String suffix = xpath.substring(suffixStart); for (String reverseAlias : list) { String reversePath = reverseAlias + suffix; - newPaths.add(reversePath); + newPaths.add(reversePath.intern()); } endIndex++; } @@ -1967,7 +1972,7 @@ public XMLSource add(String currentFullXPath, String value) { LOG_PROGRESS, "ADDING: \t" + currentFullXPath + " \t" + value + "\t" + currentFullXPath); try { - this.putValueAtPath(currentFullXPath, value); + this.putValueAtPath(currentFullXPath.intern(), value); } catch (RuntimeException e) { throw new IllegalArgumentException( "failed adding " + currentFullXPath + ",\t" + value, e); diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java index 4fe5082182e..d30a5bcbd18 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java @@ -352,4 +352,24 @@ public void testGetPaths() { .getPathsWhereFound(GERMAN_IN_SWITZERLAND)); } } + + @Test + public void TestInternedPaths() { + { + final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", false); + for (final String s : en.fullIterable()) { + final String s0 = new String(s); + assertTrue(s != s0); + assertTrue(s == s0.intern(), () -> "in unresolved en was not interned: " + s); + } + } + { + final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", true); + for (final String s : en.fullIterable()) { + final String s0 = new String(s); + assertTrue(s != s0); + assertTrue(s == s0.intern(), () -> "in resolved en was not interned: " + s); + } + } + } }