Skip to content

Commit

Permalink
CLDR-15954 Clean up generation software, regenerate the tests, and fi…
Browse files Browse the repository at this point in the history
…x the logical-group error
  • Loading branch information
macchiati committed Mar 18, 2024
1 parent 16131bb commit 2884647
Show file tree
Hide file tree
Showing 5 changed files with 148 additions and 120 deletions.
13 changes: 6 additions & 7 deletions common/testData/units/unitLocalePreferencesTest.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@

# Test data for unit locale preferences
# Copyright © 1991-2024 Unicode, Inc.
# For terms of use, see http://www.unicode.org/copyright.html
Expand All @@ -16,24 +15,24 @@
#
# The input and expected output units are unit identifers; in particular, the output does not have further processing:
# • no localization

fahrenheit; 1; default; en-u-rg-uszzzz-ms-ussystem-mu-celsius; celsius; -155/9 # mu > ms > rg > (likely) region
#
fahrenheit; 1; default; en-u-rg-uszzzz-ms-ussystem-mu-celsius; celsius; -155/9 # mu > ms > rg > (likely) region
fahrenheit; 1; default; en-u-rg-uszzzz-ms-ussystem-mu-celsius; celsius; -155/9
fahrenheit; 1; default; en-u-rg-uszzzz-ms-metric; celsius; -155/9
fahrenheit; 1; default; en-u-rg-dezzzz; celsius; -155/9
fahrenheit; 1; default; en-DE; celsius; -155/9 # explicit region > likely region
fahrenheit; 1; default; en-DE; celsius; -155/9 # explicit region > likely region
fahrenheit; 1; default; en-US; fahrenheit; 1
fahrenheit; 1; default; en; fahrenheit; 1 # likely region = US
fahrenheit; 1; default; en; fahrenheit; 1 # likely region = US
gallon-imperial; 2.5; fluid; en-u-rg-uszzzz-ms-metric; liter; 11.365225
gallon-imperial; 2.5; fluid; en-u-rg-dezzzz; liter; 11.365225
gallon-imperial; 2.5; fluid; en-DE; liter; 11.365225
gallon-imperial; 2.5; fluid; en-US-u-rg-uszzzz-ms-uksystem; gallon-imperial; 2.5 # ms-uksystem should behave like GB
gallon-imperial; 2.5; fluid; en-US-u-rg-uszzzz-ms-uksystem; gallon-imperial; 2.5 # ms-uksystem should behave like GB
gallon-imperial; 2.5; fluid; en-u-rg-gbzzzz; gallon-imperial; 2.5
gallon-imperial; 2.5; fluid; en-GB; gallon-imperial; 2.5
gallon-imperial; 2.5; fluid; en-u-rg-uszzzz-ms-ussystem; gallon; 1,420,653,125/473176473
gallon-imperial; 2.5; fluid; en-u-rg-uszzzz; gallon; 1,420,653,125/473176473
gallon-imperial; 2.5; fluid; en-US; gallon; 1,420,653,125/473176473
gallon-imperial; 2.5; fluid; en; gallon; 1,420,653,125/473176473 # likely region = US
gallon-imperial; 2.5; fluid; en; gallon; 1,420,653,125/473176473 # likely region = US
ampere; 2.5; default; en; ampere; 2.5 # an input unit whose quantity has no preference data should get base units
pound-force-foot; 12,345; default; en; kilowatt-hour; 0.004649325714486427205
kilocandela; 1; default; en; candela; 1,000 # an input unit whose quantity has no preference data should get base units
Expand Down
4 changes: 2 additions & 2 deletions common/testData/units/unitPreferencesTest.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# Test data for unit preferences
# Copyright © 1991-2024 Unicode, Inc.
# For terms of use, see http://www.unicode.org/copyright.html
# SPDX-License-Identifier: Unicode-DFS-2016
# SPDX-License-Identifier: Unicode-3.0
# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
#
# Format:
Expand All @@ -22,7 +22,7 @@
# • no formatted with the skeleton
# • no suppression of zero values (for secondary -and- units such as pound in stone-and-pound)
#
# Generation: Set GENERATE_TESTS in TestUnits.java to regenerate unitPreferencesTest.txt.
# Generation: Use GenerateUnitTestData.java to regenerate unitPreferencesTest.txt.

area; default; 001; 1100000; 1100000.0; square-meter; 11/10; 1.1; square-kilometer
area; default; 001; 1000000; 1000000.0; square-meter; 1; 1.0; square-kilometer
Expand Down
4 changes: 2 additions & 2 deletions common/testData/units/unitsTest.txt
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Test data for unit conversions
# Copyright © 1991-2024 Unicode, Inc.
# For terms of use, see http://www.unicode.org/copyright.html
# SPDX-License-Identifier: Unicode-DFS-2016
# SPDX-License-Identifier: Unicode-3.0
# CLDR data files are interpreted according to the LDML specification (http://unicode.org/reports/tr35/)
#
# Format:
Expand All @@ -12,7 +12,7 @@
# round to 4 decimal digits before comparing.
# Note that certain conversions are approximate, such as degrees to radians
#
# Generation: Set GENERATE_TESTS in TestUnits.java to regenerate unitsTest.txt.
# Generation: Use GenerateUnitTestData.java to regenerate unitsTest.txt.

acceleration ; meter-per-square-second ; meter-per-square-second ; 1 * x ; 1,000.00
acceleration ; g-force ; meter-per-square-second ; 9.80665 * x ; 9806.65
Expand Down
Original file line number Diff line number Diff line change
@@ -1,34 +1,35 @@
package org.unicode.cldr.tool;

import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Multimap;
import com.google.common.collect.TreeMultimap;
import com.ibm.icu.number.LocalizedNumberFormatter;
import com.ibm.icu.number.NumberFormatter;
import com.ibm.icu.number.NumberFormatter.UnitWidth;
import com.ibm.icu.number.UnlocalizedNumberFormatter;
import com.ibm.icu.util.Measure;
import com.ibm.icu.util.MeasureUnit;
import com.ibm.icu.util.Output;
import com.ibm.icu.util.ULocale;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.math.BigInteger;
import java.math.MathContext;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.unicode.cldr.util.CLDRLocale;
import java.util.regex.Pattern;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.Rational;
import org.unicode.cldr.util.Rational.FormatStyle;
import org.unicode.cldr.util.StandardCodes.LstrType;
import org.unicode.cldr.util.SupplementalDataInfo;
import org.unicode.cldr.util.TempPrintWriter;
Expand All @@ -55,7 +56,7 @@ public static void main(String[] args) {
GenerateUnitTestData item = new GenerateUnitTestData();
item.TestParseUnit();
item.TestUnitPreferences();
item.testIcu();
item.generateUnitLocalePreferences();
}

static {
Expand Down Expand Up @@ -136,110 +137,148 @@ public void TestParseUnit() {

public void TestUnitPreferences() {
UnitPreferences prefs = SDI.getUnitPreferences();
if (true) {
try (TempPrintWriter pw =
TempPrintWriter.openUTF8Writer(
CLDRPaths.TEST_DATA + "units", "unitPreferencesTest.txt");
TempPrintWriter pwLocale =
TempPrintWriter.openUTF8Writer(
CLDRPaths.TEST_DATA + "units",
"unitLocalePreferencesTest.txt")) {

pw.println(getHeader("Region"));
pwLocale.println(getHeader("Locale"));
Rational ONE_TENTH = Rational.of(1, 10);
try (TempPrintWriter pw =
TempPrintWriter.openUTF8Writer(
CLDRPaths.TEST_DATA + "units", "unitPreferencesTest.txt")) {
pw.println(getHeader("Region"));
Rational ONE_TENTH = Rational.of(1, 10);

// Note that for production usage, precomputed data like the
// prefs.getFastMap(converter) would be used instead of the raw data.
// Note that for production usage, precomputed data like the
// prefs.getFastMap(converter) would be used instead of the raw data.

for (Entry<String, Map<String, Multimap<Set<String>, UnitPreference>>> entry :
prefs.getData().entrySet()) {
String quantity = entry.getKey();
String baseUnit = converter.getBaseUnitFromQuantity(quantity);
for (Entry<String, Multimap<Set<String>, UnitPreference>> entry2 :
entry.getValue().entrySet()) {
String usage = entry2.getKey();
for (Entry<String, Map<String, Multimap<Set<String>, UnitPreference>>> entry :
prefs.getData().entrySet()) {
String quantity = entry.getKey();
String baseUnit = converter.getBaseUnitFromQuantity(quantity);
for (Entry<String, Multimap<Set<String>, UnitPreference>> entry2 :
entry.getValue().entrySet()) {
String usage = entry2.getKey();

// collect samples of base units
for (Entry<Set<String>, Collection<UnitPreference>> entry3 :
entry2.getValue().asMap().entrySet()) {
boolean first = true;
Set<Rational> samples = new TreeSet<>(Comparator.reverseOrder());
for (UnitPreference pref : entry3.getValue()) {
final String topUnit =
UnitPreferences.SPLIT_AND
.split(pref.unit)
.iterator()
.next();
if (first) {
samples.add(
converter.convert(
pref.geq.add(ONE_TENTH),
topUnit,
baseUnit,
false));
first = false;
}
samples.add(converter.convert(pref.geq, topUnit, baseUnit, false));
// collect samples of base units
for (Entry<Set<String>, Collection<UnitPreference>> entry3 :
entry2.getValue().asMap().entrySet()) {
boolean first = true;
Set<Rational> samples = new TreeSet<>(Comparator.reverseOrder());
for (UnitPreference pref : entry3.getValue()) {
final String topUnit =
UnitPreferences.SPLIT_AND.split(pref.unit).iterator().next();
if (first) {
samples.add(
converter.convert(
pref.geq.subtract(ONE_TENTH),
topUnit,
baseUnit,
false));
pref.geq.add(ONE_TENTH), topUnit, baseUnit, false));
first = false;
}
// show samples
Set<String> regions = entry3.getKey();
String sampleRegion = regions.iterator().next();
Collection<UnitPreference> uprefs = entry3.getValue();
for (Rational sample : samples) {
showSample(
quantity,
usage,
sampleRegion,
sample,
baseUnit,
uprefs,
pw);
for (String sampleLocale : getSampleLocales(regions)) {
showSample(
quantity,
usage,
sampleLocale,
sample,
samples.add(converter.convert(pref.geq, topUnit, baseUnit, false));
samples.add(
converter.convert(
pref.geq.subtract(ONE_TENTH),
topUnit,
baseUnit,
uprefs,
pwLocale);
}
}
pw.println();
pwLocale.println();
false));
}
// show samples
Set<String> regions = entry3.getKey();
String sampleRegion = regions.iterator().next();
Collection<UnitPreference> uprefs = entry3.getValue();
for (Rational sample : samples) {
showSample(quantity, usage, sampleRegion, sample, baseUnit, uprefs, pw);
}
pw.println();
}
}
}
}
}

static LikelySubtags likely = new LikelySubtags();
public void generateUnitLocalePreferences() {
try (TempPrintWriter pwLocale =
TempPrintWriter.openUTF8Writer(
CLDRPaths.TEST_DATA + "units", "unitLocalePreferencesTest.txt")) {

private Set<String> getSampleLocales(Set<String> regions) {
Set<String> result = new TreeSet<>();
int count = 2;
for (String region : regions) {
if (--count < 0) {
break;
try {
Set<List<Object>> seen = new HashSet<>();
// first copy existing lines
// This includes the header, so modify the old header if changes are needed!
Files.lines(Path.of(CLDRPaths.TEST_DATA + "units/unitLocalePreferencesTest.txt"))
.forEach(line -> formatPwLocale(pwLocale, line, seen));
// TODO: add more lines
formatLocaleLine(
"byte-per-millisecond", Rational.of(123), "default", "en", "", seen);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
}

static final Splitter SPLIT_SEMI = Splitter.on(Pattern.compile("\\s*;\\s*")).trimResults();

private void formatPwLocale(TempPrintWriter pwLocale, String rawLine, Set<List<Object>> seen) {
int hashPos = rawLine.indexOf('#');
String line = hashPos < 0 ? rawLine : rawLine.substring(0, hashPos);
String comment = hashPos < 0 ? "" : "#" + rawLine.substring(hashPos + 1);
if (line.isBlank()) {
if (!comment.isBlank()) {
pwLocale.println(comment);
}
String max = likely.maximize("und_" + region);
String lang = CLDRLocale.getInstance(max).getLanguage();
result.add(lang);
result.add("zu_" + region);
return;
}
List<String> parts = SPLIT_SEMI.splitToList(line);

String sourceUnit = parts.get(0);
Rational sourceAmount = Rational.of(parts.get(1));
String usage = parts.get(2);
String languageTag = parts.get(3);
String newLine =
formatLocaleLine(sourceUnit, sourceAmount, usage, languageTag, comment, seen);
if (newLine != null) {
pwLocale.println(newLine);
}
return result;
}

public String formatLocaleLine(
String sourceUnit,
Rational sourceAmount,
String usage,
String languageTag,
String comment,
Set<List<Object>> seen) {
List<Object> bundle = List.of(sourceUnit, sourceAmount, usage, languageTag);
if (bundle.contains(seen)) {
return null;
}
seen.add(bundle);

UnitPreferences prefs = SDI.getUnitPreferences();
final ULocale uLocale = ULocale.forLanguageTag(languageTag);
UnitPreference unitPreference =
prefs.getUnitPreference(sourceAmount, sourceUnit, usage, uLocale);
if (unitPreference == null) { // if the quantity isn't found
throw new IllegalArgumentException(
String.format(
"No unit preferences found for unit: %s, usage: %s, locale:%s",
sourceUnit, usage, languageTag));
}
String actualUnit = unitPreference.unit;
Rational actualValue =
converter.convert(sourceAmount, sourceUnit, unitPreference.unit, false);
// # input-unit; amount; usage; languageTag; expected-unit; expected-amount # comment
final String newFileLine =
String.format(
"%s;\t%s;\t%s;\t%s;\t%s;\t%s%s",
sourceUnit,
sourceAmount.toString(FormatStyle.formatted),
usage,
languageTag,
actualUnit,
actualValue.toString(FormatStyle.formatted),
comment.isBlank() ? "" : "\t" + comment);
return newFileLine;
}

static LikelySubtags likely = new LikelySubtags();

public String getHeader(String regionOrLocale) {
return "\n# Test data for unit preferences\n"
return "\n# Test data for unit region preferences\n"
+ CldrUtility.getCopyrightString("# ")
+ "\n"
+ "#\n"
Expand Down Expand Up @@ -398,19 +437,4 @@ private void checkUnitConvertability(
}
}
}

private void testIcu() {
UnlocalizedNumberFormatter nf =
NumberFormatter.with().unitWidth(UnitWidth.FULL_NAME).usage("road");

Object tests[][] = {{1d, MeasureUnit.MILE, "en", "result"}};
for (Object test[] : tests) {
Double value = (Double) test[0];
MeasureUnit unit = (MeasureUnit) test[1];
final LocalizedNumberFormatter localized =
nf.locale(Locale.forLanguageTag((String) test[2]));
String actual = (String) test[3];
actual = localized.format(new Measure(value, unit)).toString();
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,10 @@ public static Set<String> getSpecialsToTranslate() {
return INCLUDE_OTHER;
}

public static final Set<String> SUPPRESS_ADDING_GRAMMAR =
ImmutableSet.of(
"dot-per-centimeter", "millimeter-ofhg", "milligram-ofglucose-per-deciliter");

public static final boolean DEBUG = false;
/** Internal class for thread-safety */
static class UnitsToAddGrammar {
Expand All @@ -786,7 +790,8 @@ static class UnitsToAddGrammar {
if (!EXCLUDE_GRAMMAR.contains(shortUnit)) {
Set<UnitSystem> systems = converter.getSystemsEnum(shortUnit);
// we now add all SI and metric and si_acceptable and metric_adjacent
if (!Collections.disjoint(systems, UnitSystem.SiOrMetric)) {
if (!Collections.disjoint(systems, UnitSystem.SiOrMetric)
&& !SUPPRESS_ADDING_GRAMMAR.contains(unit)) {
_data.add(unit);
continue;
}
Expand Down

0 comments on commit 2884647

Please sign in to comment.