From ff421e78b77335700daa7c2816c0571c89229b25 Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Tue, 23 Jan 2024 06:36:29 -0600 Subject: [PATCH 1/2] CLDR-17314 String.intern() across multiple locations - Unit test to verify that CLDRFile paths are interned - interning in XPathTable and StringId --- .../java/org/unicode/cldr/web/XPathTable.java | 3 +++ .../java/org/unicode/cldr/util/CLDRFile.java | 4 ++-- .../org/unicode/cldr/util/CharUtilities.java | 24 +++++++++++++++++++ .../unicode/cldr/util/SimpleXMLSource.java | 3 ++- .../java/org/unicode/cldr/util/StringId.java | 2 +- .../java/org/unicode/cldr/util/XMLSource.java | 19 +++++++++------ .../org/unicode/cldr/util/TestCLDRFile.java | 20 ++++++++++++++++ 7 files changed, 64 insertions(+), 11 deletions(-) diff --git a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java index 257d97d1700..f598cea45d6 100644 --- a/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java +++ b/tools/cldr-apps/src/main/java/org/unicode/cldr/web/XPathTable.java @@ -269,6 +269,7 @@ private synchronized void addXpaths(Set xpaths, Connection conn) throws * @return the xpath's id (as an Integer) */ private synchronized Integer addXpath(String xpath, boolean addIfNotFound, Connection inConn) { + xpath = xpath.intern(); Integer nid = stringToId.get(xpath); // double check if (nid != null) { return nid; @@ -372,6 +373,7 @@ public final String getById(int id) { * //ldml/dates/timeZoneNames/zone[@type="America/Guadeloupe"]/short/daylight */ public final void setById(int id, String xpath) { + xpath = xpath.intern(); stringToId.put(idToString_put(id, xpath), id); sidToString.put(getStringID(xpath), xpath); } @@ -383,6 +385,7 @@ public final void setById(int id, String xpath) { * @return the id, like 692804, for the specified path */ public final int getByXpath(String xpath) { + xpath = xpath.intern(); Integer nid = stringToId.get(xpath); if (nid != null) { return nid.intValue(); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java index e63887428a4..b1a0fd99075 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java @@ -3156,8 +3156,8 @@ public Set getRawExtraPaths() { if (extraPaths == null) { extraPaths = ImmutableSet.builder() - .addAll(getRawExtraPathsPrivate()) - .addAll(CONST_EXTRA_PATHS) + .addAll(CharUtilities.internAll(getRawExtraPathsPrivate())) + .addAll(CharUtilities.internAll(CONST_EXTRA_PATHS)) .build(); if (DEBUG) { System.out.println(getLocaleID() + "\textras: " + extraPaths.size()); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java index fb763f97bbd..6bfa75f8968 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java @@ -1,7 +1,31 @@ package org.unicode.cldr.util; +import java.util.Iterator; + public class CharUtilities { + /** intern everything in the src iterable */ + public static Iterable internAll(final Iterable src) { + return new Iterable() { + @Override + public Iterator iterator() { + return CharUtilities.internAll(src.iterator()); + } + }; + } + /** intern everything in the src iterator */ + protected static Iterator internAll(final Iterator iterator) { + return new Iterator() { + @Override + public boolean hasNext() { + return iterator.hasNext(); + } + @Override + public String next() { + return iterator.next().intern(); + } + }; + } /** * Simple wrapper for CharSequence * diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java index b08c2a8229a..e97ba9d9484 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SimpleXMLSource.java @@ -108,11 +108,12 @@ public XMLSource cloneAsThawed() { @Override public void putFullPathAtDPath(String distinguishingXPath, String fullxpath) { - xpath_fullXPath.put(distinguishingXPath, fullxpath); + xpath_fullXPath.put(distinguishingXPath.intern(), fullxpath.intern()); } @Override public void putValueAtDPath(String distinguishingXPath, String value) { + distinguishingXPath = distinguishingXPath.intern(); String oldValue = xpath_value.get(distinguishingXPath); xpath_value.put(distinguishingXPath, value); updateValuePathMapping(distinguishingXPath, oldValue, value); diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java index 4db909f9b6d..c6caa6d01eb 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java @@ -34,7 +34,7 @@ public final class StringId { * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL. */ public static long getId(CharSequence charSequence) { - String string = charSequence.toString(); + String string = charSequence.toString().intern(); Long resultLong = STRING_TO_ID.get(string); if (resultLong != null) { return resultLong; diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java index 4df1bbf5959..6fae02e1d48 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java @@ -261,6 +261,7 @@ public boolean isFrozen() { * @param value */ public String putValueAtPath(String xpath, String value) { + xpath = xpath.intern(); if (locked) { throw new UnsupportedOperationException("Attempt to modify locked object"); } @@ -493,7 +494,8 @@ private TreeMap loadAliases() { if (!Alias.isAliasPath(path)) { continue; } - String fullPath = getFullPathAtDPath(path); + path = path.intern(); + String fullPath = getFullPathAtDPath(path).intern(); Alias temp = Alias.make(fullPath); if (temp == null) { continue; @@ -892,6 +894,7 @@ public Date getChangeDateAtDPath(String xpath) { private String getFullPath( String xpath, AliasLocation fullStatus, String fullPathWhereFound) { String result = null; + xpath = xpath.intern(); if (this.cachingIsEnabled) { result = getFullPathAtDPathCache.get(xpath); } @@ -1078,6 +1081,7 @@ private AliasLocation getPathLocation( boolean skipFirst, boolean skipInheritanceMarker, List list) { + xpath = xpath.intern(); // When calculating the Bailey values, we track the final // return value as firstValue. If non-null, this will become @@ -1145,6 +1149,7 @@ private AliasLocation getPathLocation( aliasedPath = aliases.get(possibleSubpath) + xpath.substring(possibleSubpath.length()); + aliasedPath = aliasedPath.intern(); if (list != null) { // It's an explicit alias, just at a parent element (subset xpath) list.add( @@ -1165,7 +1170,7 @@ private AliasLocation getPathLocation( // alts are special; they act like there is a root alias to the path without the alt. if (aliasedPath == null && xpath.contains("[@alt=")) { - aliasedPath = XPathParts.getPathWithoutAlt(xpath); + aliasedPath = XPathParts.getPathWithoutAlt(xpath).intern(); if (list != null) { list.add( new LocaleInheritanceInfo( @@ -1178,10 +1183,10 @@ private AliasLocation getPathLocation( // //ldml/numbers/currencies/currency[@type="BRZ"]/displayName[@count="other"] => // //ldml/numbers/currencies/currency[@type="BRZ"]/displayName if (aliasedPath == null && xpath.contains("[@count=")) { - aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]"); + aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]").intern(); if (aliasedPath.equals(xpath)) { if (xpath.contains("/displayName")) { - aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll(""); + aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("").intern(); if (aliasedPath.equals(xpath)) { throw new RuntimeException("Internal error"); } @@ -1290,7 +1295,7 @@ private Set findNonAliasedPaths() { // others for (XMLSource curSource : sourceList) { for (String xpath : curSource) { - paths.add(xpath); + paths.add(xpath.intern()); } } return paths; @@ -1328,7 +1333,7 @@ private Set getDirectAliases(String[] paths) { String suffix = xpath.substring(suffixStart); for (String reverseAlias : list) { String reversePath = reverseAlias + suffix; - newPaths.add(reversePath); + newPaths.add(reversePath.intern()); } endIndex++; } @@ -1967,7 +1972,7 @@ public XMLSource add(String currentFullXPath, String value) { LOG_PROGRESS, "ADDING: \t" + currentFullXPath + " \t" + value + "\t" + currentFullXPath); try { - this.putValueAtPath(currentFullXPath, value); + this.putValueAtPath(currentFullXPath.intern(), value); } catch (RuntimeException e) { throw new IllegalArgumentException( "failed adding " + currentFullXPath + ",\t" + value, e); diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java index 4fe5082182e..d30a5bcbd18 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/util/TestCLDRFile.java @@ -352,4 +352,24 @@ public void testGetPaths() { .getPathsWhereFound(GERMAN_IN_SWITZERLAND)); } } + + @Test + public void TestInternedPaths() { + { + final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", false); + for (final String s : en.fullIterable()) { + final String s0 = new String(s); + assertTrue(s != s0); + assertTrue(s == s0.intern(), () -> "in unresolved en was not interned: " + s); + } + } + { + final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", true); + for (final String s : en.fullIterable()) { + final String s0 = new String(s); + assertTrue(s != s0); + assertTrue(s == s0.intern(), () -> "in resolved en was not interned: " + s); + } + } + } } From 91f7a7cd496a4e99b053a8d951aa04452e29f5bb Mon Sep 17 00:00:00 2001 From: "Steven R. Loomis" Date: Tue, 23 Jan 2024 08:39:13 -0600 Subject: [PATCH 2/2] CLDR-17314 intern xpaths: update per code review - don't intern in StringId - simplify intern logic with lambdas --- .../java/org/unicode/cldr/util/CLDRFile.java | 90 ++++++++++--------- .../org/unicode/cldr/util/CharUtilities.java | 32 ++----- .../java/org/unicode/cldr/util/StringId.java | 2 +- 3 files changed, 55 insertions(+), 69 deletions(-) diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java index b1a0fd99075..ace78fa059d 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java @@ -55,6 +55,7 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.regex.Matcher; import java.util.regex.Pattern; +import java.util.stream.Collectors; import org.unicode.cldr.test.CheckMetazones; import org.unicode.cldr.util.DayPeriodInfo.DayPeriod; import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature; @@ -3156,8 +3157,8 @@ public Set getRawExtraPaths() { if (extraPaths == null) { extraPaths = ImmutableSet.builder() - .addAll(CharUtilities.internAll(getRawExtraPathsPrivate())) - .addAll(CharUtilities.internAll(CONST_EXTRA_PATHS)) + .addAll(getRawExtraPathsPrivate()) + .addAll(CONST_EXTRA_PATHS) .build(); if (DEBUG) { System.out.println(getLocaleID() + "\textras: " + extraPaths.size()); @@ -3186,7 +3187,7 @@ public Set getRawExtraPaths() { * client code. Make sure that updates here are reflected there and vice versa. *

Reference: https://unicode-org.atlassian.net/browse/CLDR-11238 */ - private Set getRawExtraPathsPrivate() { + private List getRawExtraPathsPrivate() { Set toAddTo = new HashSet<>(); SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo(); // units @@ -3377,7 +3378,7 @@ private Set getRawExtraPathsPrivate() { } } } - return toAddTo; + return toAddTo.stream().map(String::intern).collect(Collectors.toList()); } private void addPluralCounts( @@ -4307,44 +4308,45 @@ private String getStringValueWithBaileyNotConstructed(String path) { * TestPaths.extraPathAllowsNullValue */ static final Set CONST_EXTRA_PATHS = - ImmutableSet.of( - // Individual zone overrides — were in getRawExtraPaths - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic", - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard", - "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight", - "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard", - "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard", - // Person name paths - "//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]", - "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]", - "//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]", - "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]"); + CharUtilities.internImmutableSet( + Set.of( + // Individual zone overrides — were in getRawExtraPaths + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic", + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard", + "//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight", + "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard", + "//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard", + // Person name paths + "//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]", + "//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]", + "//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]", + "//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]")); } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java index 6bfa75f8968..90effb5c73e 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/CharUtilities.java @@ -1,31 +1,10 @@ package org.unicode.cldr.util; -import java.util.Iterator; +import java.util.Collection; +import java.util.Set; +import java.util.stream.Collectors; public class CharUtilities { - /** intern everything in the src iterable */ - public static Iterable internAll(final Iterable src) { - return new Iterable() { - @Override - public Iterator iterator() { - return CharUtilities.internAll(src.iterator()); - } - }; - } - /** intern everything in the src iterator */ - protected static Iterator internAll(final Iterator iterator) { - return new Iterator() { - @Override - public boolean hasNext() { - return iterator.hasNext(); - } - - @Override - public String next() { - return iterator.next().intern(); - } - }; - } /** * Simple wrapper for CharSequence * @@ -168,4 +147,9 @@ public static int compare(CharSequence text1, CharSequence text2) { } } } + + /** intern each element in the string and return a new unmodifiable Set */ + public static Set internImmutableSet(Collection s) { + return s.stream().map(String::intern).collect(Collectors.toUnmodifiableSet()); + } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java index c6caa6d01eb..4db909f9b6d 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/StringId.java @@ -34,7 +34,7 @@ public final class StringId { * @return a value from 0 to 0x7FFFFFFFFFFFFFFFL. */ public static long getId(CharSequence charSequence) { - String string = charSequence.toString().intern(); + String string = charSequence.toString(); Long resultLong = STRING_TO_ID.get(string); if (resultLong != null) { return resultLong;