diff --git a/common/supplemental/units.xml b/common/supplemental/units.xml index 33d6d68d466..ebaa8e9d08f 100644 --- a/common/supplemental/units.xml +++ b/common/supplemental/units.xml @@ -255,7 +255,7 @@ For terms of use, see http://www.unicode.org/copyright.html - + diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java index 6fb6961f6c6..694dee2e677 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java @@ -15,6 +15,7 @@ import com.google.common.collect.TreeMultimap; import com.ibm.icu.impl.Row.R2; import com.ibm.icu.lang.UCharacter; +import com.ibm.icu.number.UnlocalizedNumberFormatter; import com.ibm.icu.text.PluralRules; import com.ibm.icu.util.Freezable; import com.ibm.icu.util.Output; @@ -2236,4 +2237,30 @@ public String resolve(String unit) { String resolved = unitId.resolve().toString(); return getStandardUnit(resolved.isBlank() ? unit : resolved); } + + public String format( + final String languageTag, + Rational outputAmount, + final String unit, + UnlocalizedNumberFormatter nf3) { + final CLDRConfig config = CLDRConfig.getInstance(); + Factory factory = config.getCldrFactory(); + int pos = languageTag.indexOf("-u"); + String localeBase = + (pos < 0 ? languageTag : languageTag.substring(0, pos)).replace('-', '_'); + CLDRFile localeFile = factory.make(localeBase, true); + PluralRules pluralRules = + config.getSupplementalDataInfo() + .getPluralRules( + localeBase, com.ibm.icu.text.PluralRules.PluralType.CARDINAL); + String pluralCategory = pluralRules.select(outputAmount.doubleValue()); + String path = + UnitPathType.unit.getTranslationPath( + localeFile, "long", unit, pluralCategory, "nominative", "neuter"); + String pattern = localeFile.getStringValue(path); + final ULocale uLocale = ULocale.forLanguageTag(languageTag); + String cldrFormattedNumber = + nf3.locale(uLocale).format(outputAmount.doubleValue()).toString(); + return com.ibm.icu.text.MessageFormat.format(pattern, cldrFormattedNumber); + } } diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitPreferences.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitPreferences.java index 7eac1dd6270..79ccb40a39c 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitPreferences.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitPreferences.java @@ -2,19 +2,25 @@ import com.google.common.base.Joiner; import com.google.common.base.Splitter; +import com.google.common.base.Supplier; +import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; import com.google.common.collect.LinkedHashMultimap; import com.google.common.collect.Multimap; -import com.ibm.icu.impl.locale.XCldrStub.ImmutableMap; import com.ibm.icu.util.Freezable; +import com.ibm.icu.util.Output; +import com.ibm.icu.util.ULocale; import java.util.Collection; import java.util.LinkedHashMap; +import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.Objects; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; +import org.unicode.cldr.tool.LikelySubtags; +import org.unicode.cldr.util.UnitConverter.ConversionInfo; public class UnitPreferences implements Freezable { Map, UnitPreference>>> quantityToUsageToRegionsToInfo = @@ -93,8 +99,9 @@ public void add( Rational newGeq = geq == null || geq.isEmpty() ? Rational.ONE : Rational.of(geq); final UnitPreference newUnitPref = new UnitPreference(newGeq, unit, skeleton); - regionsToInfo.put( - ImmutableSet.copyOf(new TreeSet<>(SPLIT_SPACE.splitToList(regions))), newUnitPref); + final ImmutableSet regionSet = + ImmutableSet.copyOf(new TreeSet<>(SPLIT_SPACE.splitToList(regions))); + boolean old = regionsToInfo.put(regionSet, newUnitPref); } boolean frozen; @@ -195,33 +202,41 @@ public String getPath( * * @return */ - public Map>> getFastMap( - UnitConverter converter) { - Map>> result = new LinkedHashMap<>(); + private Map>> getRawFastMap() { + UnitConverter converter = SupplementalDataInfo.getInstance().getUnitConverter(); + Map>> result = new LinkedHashMap<>(); for (Entry, UnitPreference>>> entry1 : quantityToUsageToRegionsToInfo.entrySet()) { String quantity = entry1.getKey(); - Map> result2 = new LinkedHashMap<>(); + Map> result2 = new LinkedHashMap<>(); result.put(quantity, result2); for (Entry, UnitPreference>> entry2 : entry1.getValue().entrySet()) { String usage = entry2.getKey(); - Map result3 = new LinkedHashMap<>(); + Multimap result3 = LinkedHashMultimap.create(); result2.put(usage, result3); + + // split the regions for (Entry, Collection> entry : entry2.getValue().asMap().entrySet()) { Set regions = entry.getKey(); + int len = entry.getValue().size(); for (UnitPreference up : entry.getValue()) { String unit = SPLIT_AND.split(up.unit).iterator().next(); // first unit quantity = converter.getQuantityFromUnit(unit, false); String baseUnit = converter.getBaseUnitFromQuantity(quantity); - Rational geq = converter.parseRational(String.valueOf(up.geq)); - Rational value = converter.convert(geq, unit, baseUnit, false); - if (value.equals(Rational.NaN)) { - converter.convert(geq, unit, baseUnit, true); // debug + Rational baseGeq; + if (--len == 0) { // set last value to least possible + baseGeq = Rational.NEGATIVE_INFINITY; + } else { + Rational geq = converter.parseRational(String.valueOf(up.geq)); + baseGeq = converter.convert(geq, unit, baseUnit, false); + if (baseGeq.equals(Rational.NaN)) { + converter.convert(geq, unit, baseUnit, true); // debug + } } - UnitPreference up2 = new UnitPreference(value, up.unit, up.skeleton); + UnitPreference up2 = new UnitPreference(baseGeq, up.unit, up.skeleton); for (String region : regions) { result3.put(region, up2); } @@ -229,10 +244,116 @@ public Map>> getFastMap( } } } - return ImmutableMap.copyOf(result); + return CldrUtility.protectCollection(result); + } + + Supplier>>> + quantityToUsageToRegionToInfo = Suppliers.memoize(() -> getRawFastMap()); + + public Map>> getFastMap() { + return quantityToUsageToRegionToInfo.get(); + } + + public UnitPreference getUnitPreference( + Rational sourceAmount, String sourceUnit, String usage, ULocale locale) { + UnitConverter converter = SupplementalDataInfo.getInstance().getUnitConverter(); + sourceUnit = converter.fixDenormalized(sourceUnit); + + String mu = locale.getUnicodeLocaleType("mu"); + // TODO if the value is not a unit, skip + if (mu != null) { + Rational conversion = converter.convert(sourceAmount, sourceUnit, mu, false); + return new UnitPreference(conversion, mu, null); + } + + String quantity = converter.getQuantityFromUnit(sourceUnit, false); + String baseUnit = converter.getBaseUnitFromQuantity(quantity); + + Map> usageToRegionsToInfo = + getFastMap().get(quantity); + + // If there is no quantity among the preferences, + // return the metric UnitPreference + if (usageToRegionsToInfo == null) { + String standardUnit = converter.getStandardUnit(sourceUnit); + if (!sourceUnit.equals(standardUnit)) { + Rational conversion = + converter.convert(sourceAmount, sourceUnit, standardUnit, false); + return new UnitPreference(conversion, standardUnit, null); + } + return new UnitPreference(sourceAmount, sourceUnit, null); + } + + Multimap regionToInfo = usageToRegionsToInfo.get(usage); + + if (regionToInfo == null) { + regionToInfo = usageToRegionsToInfo.get("default"); + } + + // normalize for matching + sourceAmount = sourceAmount.abs(); + if (sourceAmount.equals(Rational.NaN)) { + sourceAmount = Rational.NEGATIVE_ONE; + } + + String region = resolveRegion(locale); + Collection infoList = regionToInfo.get(region); + if (infoList == null || infoList.isEmpty()) { + infoList = regionToInfo.get("001"); + } + + Output baseUnitOutput = new Output<>(); + ConversionInfo sourceConversionInfo = + converter.parseUnitId(sourceUnit, baseUnitOutput, false); + Rational baseValue = sourceConversionInfo.convert(sourceAmount); + + for (UnitPreference info : infoList) { // data is built to always terminate + if (baseValue.compareTo(info.geq) >= 0) { + return info; + } + } + throw new IllegalArgumentException("Fast map should always terminate"); + } + + public String resolveRegion(ULocale locale) { + // https://unicode.org/reports/tr35/tr35-info.html#Unit_Preferences + // en-u-rg-uszzzz-ms-ussystem + String ms = locale.getUnicodeLocaleType("ms"); + if (ms != null) { + switch (ms) { + case "metric": + return "001"; + case "uksystem": + return "GB"; + case "ussystem": + return "US"; + default: + throw new IllegalArgumentException( + "Illegal ms value in: " + locale.toLanguageTag()); + } + } + String rg = locale.getUnicodeLocaleType("rg"); + if (rg != null) { + // TODO: check for illegal rg value + return rg.substring(0, 2).toUpperCase(Locale.ROOT); + } + String region = locale.getCountry(); + if (!region.isEmpty()) { + return region; + } + LikelySubtags LIKELY = new LikelySubtags(); + String maximized = LIKELY.maximize(locale.toLanguageTag()); + if (maximized != null) { + return ULocale.getCountry(maximized); + } + return "001"; } public Set getUsages() { return usages; } + + public Set getQuantities() { + return getFastMap().keySet(); + } } diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java index 78111bc1aa4..4a20addbb1b 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestUnits.java @@ -22,10 +22,19 @@ import com.ibm.icu.impl.Row; import com.ibm.icu.impl.Row.R2; import com.ibm.icu.impl.Row.R3; +import com.ibm.icu.number.FormattedNumber; +import com.ibm.icu.number.LocalizedNumberFormatter; +import com.ibm.icu.number.NumberFormatter; +import com.ibm.icu.number.NumberFormatter.UnitWidth; +import com.ibm.icu.number.Precision; +import com.ibm.icu.number.UnlocalizedNumberFormatter; import com.ibm.icu.text.PluralRules; import com.ibm.icu.text.UnicodeSet; import com.ibm.icu.util.ICUUncheckedIOException; +import com.ibm.icu.util.Measure; +import com.ibm.icu.util.MeasureUnit; import com.ibm.icu.util.Output; +import com.ibm.icu.util.ULocale; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; @@ -43,6 +52,7 @@ import java.util.LinkedHashMap; import java.util.LinkedHashSet; import java.util.List; +import java.util.Locale; import java.util.Map; import java.util.Map.Entry; import java.util.Objects; @@ -114,6 +124,7 @@ import org.unicode.cldr.util.XPathParts; public class TestUnits extends TestFmwk { + private static final boolean TEST_ICU = System.getProperty("TEST_ICU") != null; private static final Joiner JOIN_COMMA = Joiner.on(", "); @@ -137,7 +148,7 @@ public class TestUnits extends TestFmwk { Validity.getInstance().getStatusToCodes(LstrType.unit).get(Validity.Status.regular); private static final Set DEPRECATED_REGULAR_UNITS = Validity.getInstance().getStatusToCodes(LstrType.unit).get(Validity.Status.deprecated); - private static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); + public static final CLDRConfig CLDR_CONFIG = CLDRConfig.getInstance(); private static final Integer INTEGER_ONE = 1; public static boolean getFlag(String flag) { @@ -1779,25 +1790,6 @@ public void TestUnitPreferences() { "If this fails, check the output of TestUnitPreferencesSource (with -DTestUnits:SHOW_DATA), fix as needed, then incorporate."); UnitPreferences prefs = SDI.getUnitPreferences(); checkUnitPreferences(prefs); - // Map>> fastMap = - // prefs.getFastMap(converter); - // for (Entry>> entry : - // fastMap.entrySet()) { - // String quantity = entry.getKey(); - // String baseUnit = converter.getBaseUnitFromQuantity(quantity); - // for (Entry> entry2 : - // entry.getValue().entrySet()) { - // String usage = entry2.getKey(); - // for (Entry entry3 : entry2.getValue().entrySet()) - // { - // String region = entry3.getKey(); - // UnitPreference pref = entry3.getValue(); - // System.out.println(quantity + "\t" + usage + "\t" + region + "\t" + - // pref.toString(baseUnit)); - // } - // } - // } - prefs.getFastMap(converter); // call just to make sure we don't get an exception if (GENERATE_TESTS) { try (TempPrintWriter pw = @@ -4245,4 +4237,299 @@ public boolean assertNotContains( units + ": " + systemSet + " does not contain " + unitSystem, systemSet.contains(unitSystem)); } + + public void testPreferencesWithLocales() { + List> tests = + List.of( + List.of( + 1d, + MeasureUnit.FAHRENHEIT, + "default", + "en-u-rg-uszzzz-ms-ussystem-mu-celsius", + Rational.of(-155, 9), + "celsius"), + List.of( + 1d, + MeasureUnit.FAHRENHEIT, + "default", + "en-u-rg-uszzzz-ms-ussystem-mu-celsius", + Rational.of(-155, 9), + "celsius"), + List.of( + 1d, + MeasureUnit.FAHRENHEIT, + "default", + "en-u-rg-uszzzz-ms-metric", + Rational.of(-155, 9), + "celsius"), + List.of( + 1d, + MeasureUnit.FAHRENHEIT, + "default", + "en-u-rg-dezzzz", + Rational.of(-155, 9), + "celsius"), + List.of( + 1d, + MeasureUnit.FAHRENHEIT, + "default", + "en-DE", + Rational.of(-155, 9), + "celsius"), + List.of( + 1d, + MeasureUnit.FAHRENHEIT, + "default", + "en-US", + Rational.of(1), + "fahrenheit"), + List.of( + 1d, + MeasureUnit.FAHRENHEIT, + "default", + "en", + Rational.of(1), + "fahrenheit"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-u-rg-uszzzz-ms-metric", + Rational.of(454609, 40000), + "liter"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-u-rg-dezzzz", + Rational.of(454609, 40000), + "liter"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-DE", + Rational.of(454609, 40000), + "liter"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-u-rg-uszzzz-ms-uksystem", + Rational.of(5, 2), + "gallon-imperial"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-u-rg-gbzzzz", + Rational.of(5, 2), + "gallon-imperial"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-GB", + Rational.of(5, 2), + "gallon-imperial"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-u-rg-uszzzz-ms-ussystem", + Rational.of(1420653125, 473176473), + "gallon"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-u-rg-uszzzz", + Rational.of(1420653125, 473176473), + "gallon"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en-US", + Rational.of(1420653125, 473176473), + "gallon"), + List.of( + 2.5d, + MeasureUnit.GALLON_IMPERIAL, + "fluid", + "en", + Rational.of(1420653125, 473176473), + "gallon"), + List.of( + 2.5d, + MeasureUnit.AMPERE, + "default", + "en", + Rational.of(5, 2), + "ampere"), + List.of( + 12345d, + MeasureUnit.forIdentifier("foot-pound-force"), + "default", + "en", + Rational.of("929865142897285441/200000000000000000000"), + "kilowatt-hour"), + List.of( + 28d, + MeasureUnit.forIdentifier("pound"), + "default", + "en-u-mu-stone", + Rational.TWO, + "stone")); + for (boolean isICU : List.of(false, true)) { + for (List test : tests) { + String actualUnit; + Rational actualValue; + + Double sourceAmount = ((Number) test.get(0)).doubleValue(); + final MeasureUnit sourceUnit = (MeasureUnit) test.get(1); + final String sourceUnitString = Units.getShort(sourceUnit.toString()); + String usage = (String) test.get(2); + final String languageTag = (String) test.get(3); + Rational expectedValue = (Rational) test.get(4); + String expectedUnit = (String) test.get(5); + if (!isICU) { + try { + Rational rationalAmount = Rational.of(sourceAmount.toString()); + UnitPreferences prefs = SDI.getUnitPreferences(); + final ULocale uLocale = ULocale.forLanguageTag(languageTag); + UnitPreference unitPreference1 = + prefs.getUnitPreference( + rationalAmount, sourceUnitString, usage, uLocale); + if (unitPreference1 == null) { // if the quantity isn't found + throw new IllegalArgumentException( + String.format( + "No unit preferences found for unit: %s, usage: %s, locale:%s", + sourceUnitString, usage, languageTag)); + } + UnitPreference unitPreference = unitPreference1; + actualUnit = unitPreference.unit; + actualValue = + converter.convert( + rationalAmount, + sourceUnitString, + unitPreference.unit, + false); + } catch (Exception e1) { + actualUnit = e1.getMessage(); + actualValue = Rational.NaN; + } + if (assertEquals( + "CLDR unit pref" + test.subList(0, test.size() - 1).toString(), + expectedUnit, + actualUnit)) { + assertEquals( + "CLDR value" + test.subList(0, test.size() - 1).toString(), + expectedValue, + actualValue); + } + } else if (TEST_ICU) { + float actualValueFloat; + try { + UnlocalizedNumberFormatter nf = + NumberFormatter.with() + .unitWidth(UnitWidth.FULL_NAME) + .precision(Precision.maxSignificantDigits(20)); + LocalizedNumberFormatter localized = + nf.usage(usage).locale(Locale.forLanguageTag(languageTag)); + final FormattedNumber formatted = + localized.format(new Measure(sourceAmount, sourceUnit)); + MeasureUnit icuOutputUnit = formatted.getOutputUnit(); + actualUnit = icuOutputUnit.getSubtype(); + actualValueFloat = formatted.toBigDecimal().floatValue(); + } catch (Exception e) { + actualUnit = e.getMessage(); + actualValueFloat = Float.NaN; + } + if (assertEquals( + "ICU unit pref" + test.subList(0, test.size() - 1).toString(), + expectedUnit, + actualUnit)) { + assertEquals( + "ICU value" + test.subList(0, test.size() - 1).toString(), + (float) expectedValue.doubleValue(), + actualValueFloat); + } + } + } + } + } + + // private String format(UnlocalizedNumberFormatter nf3, Double sourceAmount, final String + // sourceUnitString, String usage, final String languageTag) { + // Rational rationalAmount = Rational.of(sourceAmount.toString()); + // UnitPreferences prefs = SDI.getUnitPreferences(); + // final ULocale uLocale = ULocale.forLanguageTag(languageTag); + // UnitPreference unitPreference1 = + // prefs.getUnitPreference( + // rationalAmount, + // sourceUnitString, + // usage, + // uLocale); + // if (unitPreference1 == null) { // if the quantity isn't found + // throw new IllegalArgumentException(String.format("No unit preferences found for + // unit: %s, usage: %s, locale:%s", sourceUnitString, usage, languageTag)); + // } + // UnitPreference unitPreference = unitPreference1; + // final String unit = unitPreference.unit; + // Rational outputAmount = + // converter.convert(rationalAmount, sourceUnitString, unit, false); + // return converter.format(languageTag, outputAmount, unit, nf3); + // } + + public void testQuantitiesMissingFromPreferences() { + UnitPreferences prefs = SDI.getUnitPreferences(); + Set preferenceQuantities = prefs.getQuantities(); + Set unitQuantities = converter.getQuantities(); + assertEquals( + "pref - unit quantities", + Collections.emptySet(), + Sets.difference(preferenceQuantities, unitQuantities)); + final SetView quantitiesNotInPreferences = + Sets.difference(unitQuantities, preferenceQuantities); + if (!quantitiesNotInPreferences.isEmpty()) { + warnln("unit - pref quantities = " + quantitiesNotInPreferences); + } + for (String unit : converter.getSimpleUnits()) { + String quantity = converter.getQuantityFromUnit(unit, false); + if (!quantitiesNotInPreferences.contains(quantity)) { + continue; + } + // we have a unit whose quantity is not in preferences + // get its unit preferences + UnitPreference pref = + prefs.getUnitPreference(Rational.ONE, unit, "default", ULocale.US); + if (pref == null) { + errln( + String.format( + "Default preference is null: input unit=%s, quantity=%s", + unit, quantity)); + continue; + } + // ensure that it is metric + Set inputSystems = converter.getSystemsEnum(unit); + if (Collections.disjoint(inputSystems, UnitSystem.SiOrMetric)) { + warnln( + String.format( + "There are no explicit preferences for %s, but %s is not metric", + quantity, unit)); + } + Set systems = converter.getSystemsEnum(pref.unit); + + String errorOrWarningString = + String.format( + "Test default preference is metric: input unit=%s, quantity=%s, pref-unit=%s, systems: %s", + unit, quantity, pref.unit, systems); + if (Collections.disjoint(systems, UnitSystem.SiOrMetric)) { + errln(errorOrWarningString); + } else { + logln("OK " + errorOrWarningString); + } + } + } }