Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-17314 intern across multiple locations #3461

Merged
merged 2 commits into from
Jan 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ private synchronized void addXpaths(Set<String> xpaths, Connection conn) throws
* @return the xpath's id (as an Integer)
*/
private synchronized Integer addXpath(String xpath, boolean addIfNotFound, Connection inConn) {
xpath = xpath.intern();
Integer nid = stringToId.get(xpath); // double check
if (nid != null) {
return nid;
Expand Down Expand Up @@ -372,6 +373,7 @@ public final String getById(int id) {
* //ldml/dates/timeZoneNames/zone[@type="America/Guadeloupe"]/short/daylight
*/
public final void setById(int id, String xpath) {
xpath = xpath.intern();
stringToId.put(idToString_put(id, xpath), id);
sidToString.put(getStringID(xpath), xpath);
}
Expand All @@ -383,6 +385,7 @@ public final void setById(int id, String xpath) {
* @return the id, like 692804, for the specified path
*/
public final int getByXpath(String xpath) {
xpath = xpath.intern();
Integer nid = stringToId.get(xpath);
if (nid != null) {
return nid.intValue();
Expand Down
86 changes: 44 additions & 42 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/CLDRFile.java
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import org.unicode.cldr.test.CheckMetazones;
import org.unicode.cldr.util.DayPeriodInfo.DayPeriod;
import org.unicode.cldr.util.GrammarInfo.GrammaticalFeature;
Expand Down Expand Up @@ -3186,7 +3187,7 @@ public Set<String> getRawExtraPaths() {
* client code. Make sure that updates here are reflected there and vice versa.
* <p>Reference: https://unicode-org.atlassian.net/browse/CLDR-11238
*/
private Set<String> getRawExtraPathsPrivate() {
private List<String> getRawExtraPathsPrivate() {
Set<String> toAddTo = new HashSet<>();
SupplementalDataInfo supplementalData = CLDRConfig.getInstance().getSupplementalDataInfo();
// units
Expand Down Expand Up @@ -3377,7 +3378,7 @@ private Set<String> getRawExtraPathsPrivate() {
}
}
}
return toAddTo;
return toAddTo.stream().map(String::intern).collect(Collectors.toList());
}

private void addPluralCounts(
Expand Down Expand Up @@ -4307,44 +4308,45 @@ private String getStringValueWithBaileyNotConstructed(String path) {
* TestPaths.extraPathAllowsNullValue
*/
static final Set<String> CONST_EXTRA_PATHS =
ImmutableSet.of(
// Individual zone overrides — were in getRawExtraPaths
"//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic",
"//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard",
"//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight",
"//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight",
"//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight",
"//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard",
"//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard",
// Person name paths
"//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]",
"//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]");
CharUtilities.internImmutableSet(
Set.of(
// Individual zone overrides — were in getRawExtraPaths
"//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/generic",
"//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/standard",
"//ldml/dates/timeZoneNames/zone[@type=\"Pacific/Honolulu\"]/short/daylight",
"//ldml/dates/timeZoneNames/zone[@type=\"Europe/Dublin\"]/long/daylight",
"//ldml/dates/timeZoneNames/zone[@type=\"Europe/London\"]/long/daylight",
"//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/long/standard",
"//ldml/dates/timeZoneNames/zone[@type=\"Etc/UTC\"]/short/standard",
// Person name paths
"//ldml/personNames/sampleName[@item=\"nativeG\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"nativeGGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"title\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given-informal\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-prefix\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname-core\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"surname2\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"generation\"]",
"//ldml/personNames/sampleName[@item=\"nativeFull\"]/nameField[@type=\"credentials\"]",
"//ldml/personNames/sampleName[@item=\"foreignG\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"foreignGGS\"]/nameField[@type=\"surname\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"title\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given-informal\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"given2\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-prefix\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname-core\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"surname2\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"generation\"]",
"//ldml/personNames/sampleName[@item=\"foreignFull\"]/nameField[@type=\"credentials\"]"));
}
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
package org.unicode.cldr.util;

public class CharUtilities {
import java.util.Collection;
import java.util.Set;
import java.util.stream.Collectors;

public class CharUtilities {
/**
* Simple wrapper for CharSequence
*
Expand Down Expand Up @@ -144,4 +147,9 @@ public static int compare(CharSequence text1, CharSequence text2) {
}
}
}

/** intern each element in the string and return a new unmodifiable Set */
public static Set<String> internImmutableSet(Collection<String> s) {
return s.stream().map(String::intern).collect(Collectors.toUnmodifiableSet());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -108,11 +108,12 @@ public XMLSource cloneAsThawed() {

@Override
public void putFullPathAtDPath(String distinguishingXPath, String fullxpath) {
xpath_fullXPath.put(distinguishingXPath, fullxpath);
xpath_fullXPath.put(distinguishingXPath.intern(), fullxpath.intern());
}

@Override
public void putValueAtDPath(String distinguishingXPath, String value) {
distinguishingXPath = distinguishingXPath.intern();
String oldValue = xpath_value.get(distinguishingXPath);
xpath_value.put(distinguishingXPath, value);
updateValuePathMapping(distinguishingXPath, oldValue, value);
Expand Down
19 changes: 12 additions & 7 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/XMLSource.java
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,7 @@ public boolean isFrozen() {
* @param value
*/
public String putValueAtPath(String xpath, String value) {
xpath = xpath.intern();
if (locked) {
throw new UnsupportedOperationException("Attempt to modify locked object");
}
Expand Down Expand Up @@ -493,7 +494,8 @@ private TreeMap<String, String> loadAliases() {
if (!Alias.isAliasPath(path)) {
continue;
}
String fullPath = getFullPathAtDPath(path);
path = path.intern();
String fullPath = getFullPathAtDPath(path).intern();
Alias temp = Alias.make(fullPath);
if (temp == null) {
continue;
Expand Down Expand Up @@ -892,6 +894,7 @@ public Date getChangeDateAtDPath(String xpath) {
private String getFullPath(
String xpath, AliasLocation fullStatus, String fullPathWhereFound) {
String result = null;
xpath = xpath.intern();
if (this.cachingIsEnabled) {
result = getFullPathAtDPathCache.get(xpath);
}
Expand Down Expand Up @@ -1078,6 +1081,7 @@ private AliasLocation getPathLocation(
boolean skipFirst,
boolean skipInheritanceMarker,
List<LocaleInheritanceInfo> list) {
xpath = xpath.intern();

// When calculating the Bailey values, we track the final
// return value as firstValue. If non-null, this will become
Expand Down Expand Up @@ -1145,6 +1149,7 @@ private AliasLocation getPathLocation(
aliasedPath =
aliases.get(possibleSubpath)
+ xpath.substring(possibleSubpath.length());
aliasedPath = aliasedPath.intern();
if (list != null) {
// It's an explicit alias, just at a parent element (subset xpath)
list.add(
Expand All @@ -1165,7 +1170,7 @@ private AliasLocation getPathLocation(

// alts are special; they act like there is a root alias to the path without the alt.
if (aliasedPath == null && xpath.contains("[@alt=")) {
aliasedPath = XPathParts.getPathWithoutAlt(xpath);
aliasedPath = XPathParts.getPathWithoutAlt(xpath).intern();
if (list != null) {
list.add(
new LocaleInheritanceInfo(
Expand All @@ -1178,10 +1183,10 @@ private AliasLocation getPathLocation(
// //ldml/numbers/currencies/currency[@type="BRZ"]/displayName[@count="other"] =>
// //ldml/numbers/currencies/currency[@type="BRZ"]/displayName
if (aliasedPath == null && xpath.contains("[@count=")) {
aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]");
aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("[@count=\"other\"]").intern();
if (aliasedPath.equals(xpath)) {
if (xpath.contains("/displayName")) {
aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("");
aliasedPath = COUNT_EQUALS.matcher(xpath).replaceAll("").intern();
if (aliasedPath.equals(xpath)) {
throw new RuntimeException("Internal error");
}
Expand Down Expand Up @@ -1290,7 +1295,7 @@ private Set<String> findNonAliasedPaths() {
// others
for (XMLSource curSource : sourceList) {
for (String xpath : curSource) {
paths.add(xpath);
paths.add(xpath.intern());
}
}
return paths;
Expand Down Expand Up @@ -1328,7 +1333,7 @@ private Set<String> getDirectAliases(String[] paths) {
String suffix = xpath.substring(suffixStart);
for (String reverseAlias : list) {
String reversePath = reverseAlias + suffix;
newPaths.add(reversePath);
newPaths.add(reversePath.intern());
}
endIndex++;
}
Expand Down Expand Up @@ -1967,7 +1972,7 @@ public XMLSource add(String currentFullXPath, String value) {
LOG_PROGRESS,
"ADDING: \t" + currentFullXPath + " \t" + value + "\t" + currentFullXPath);
try {
this.putValueAtPath(currentFullXPath, value);
this.putValueAtPath(currentFullXPath.intern(), value);
} catch (RuntimeException e) {
throw new IllegalArgumentException(
"failed adding " + currentFullXPath + ",\t" + value, e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -352,4 +352,24 @@ public void testGetPaths() {
.getPathsWhereFound(GERMAN_IN_SWITZERLAND));
}
}

@Test
public void TestInternedPaths() {
{
final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", false);
for (final String s : en.fullIterable()) {
final String s0 = new String(s);
assertTrue(s != s0);
assertTrue(s == s0.intern(), () -> "in unresolved en was not interned: " + s);
}
}
{
final CLDRFile en = CLDRConfig.getInstance().getCLDRFile("en", true);
for (final String s : en.fullIterable()) {
final String s0 = new String(s);
assertTrue(s != s0);
assertTrue(s == s0.intern(), () -> "in resolved en was not interned: " + s);
}
}
}
}
Loading