From 9c666910c7cc40c52cef79274d0643acaa9fac17 Mon Sep 17 00:00:00 2001
From: Peter Edberg <42151464+pedberg-icu@users.noreply.github.com>
Date: Fri, 2 Feb 2024 14:38:12 -0800
Subject: [PATCH] CLDR-16329 Add convertUnit special attribute for beaufort;
update UnitConverter and some related code (#3487)
---
common/dtd/ldmlSupplemental.dtd | 3 +
common/supplemental/units.xml | 2 +
docs/ldml/tr35-info.md | 14 +-
docs/ldml/tr35.md | 5 +-
.../cldr/test/LogicalGroupChecker.java | 4 +
.../cldr/tool/ChartUnitConversions.java | 1 +
.../cldr/tool/UnitValidityCanonicalizer.java | 7 +-
.../cldr/util/SupplementalDataInfo.java | 3 +-
.../org/unicode/cldr/util/UnitConverter.java | 155 ++++++++++++++++--
.../org/unicode/cldr/util/data/PathHeader.txt | 1 +
10 files changed, 175 insertions(+), 20 deletions(-)
diff --git a/common/dtd/ldmlSupplemental.dtd b/common/dtd/ldmlSupplemental.dtd
index b0ffaa2569c..230f1d0c9f7 100644
--- a/common/dtd/ldmlSupplemental.dtd
+++ b/common/dtd/ldmlSupplemental.dtd
@@ -457,6 +457,9 @@ CLDR data files are interpreted according to the LDML specification (http://unic
+
+
+
diff --git a/common/supplemental/units.xml b/common/supplemental/units.xml
index 7a723d37a13..33d6d68d466 100644
--- a/common/supplemental/units.xml
+++ b/common/supplemental/units.xml
@@ -342,6 +342,8 @@ For terms of use, see http://www.unicode.org/copyright.html
+
+
diff --git a/docs/ldml/tr35-info.md b/docs/ldml/tr35-info.md
index d6476c38a6e..18959b043c1 100644
--- a/docs/ldml/tr35-info.md
+++ b/docs/ldml/tr35-info.md
@@ -862,14 +862,16 @@ An implementation need not use rationals directly for conversion; it could use d
-
+
+
+
```
The conversion data provides the data for converting all of the cldr unit identifiers to base units, and back. That allows conversion between any two convertible units, such as two units of length. For any two convertible units (such as acre and dunum) the first can be converted to the base unit (square-meter), then that base unit can be converted to the second unit.
-The data is expressed as conversions to the base unit. The information can also be used for the conversion back.
+The data is expressed as conversions to the base unit from the source unit. The information can also be used for the conversion back.
Examples:
@@ -897,6 +899,14 @@ The factor and offset can be simple expressions, just like the values in the uni
Where a factor is not present, the value is 1; where an offset is not present, the value is 0.
+Instead of using `factor` and possibly `offset`, the `convertUnit` element can specify a `special` conversion that cannot be described by factor and offset (and this attribute cannot be used in conunction with factor and offset). For example:
+
+```xml
+
+```
+
+The only `special` conversion currently supported is for beaufort.
+
The `systems` attribute indicates the measurement system(s) or other characteristics of a set of unts. Multiple values may be given; for example, a unit could be marked as systems="`si_acceptable` `metric_adjacent` `prefixable`".
The allowed attributes are the following:
diff --git a/docs/ldml/tr35.md b/docs/ldml/tr35.md
index 390e3705384..67dd5479606 100644
--- a/docs/ldml/tr35.md
+++ b/docs/ldml/tr35.md
@@ -5,7 +5,7 @@
|Version|45 (draft)|
|-------|----------|
|Editors|Mark Davis (markdavis@google.com) and other CLDR committee members|
-|Date|2024-01-14|
+|Date|2024-01-31|
|This Version|https://www.unicode.org/reports/tr35/tr35-72/tr35.html|
|Previous Version|https://www.unicode.org/reports/tr35/tr35-71/tr35.html|
|Latest Version|https://www.unicode.org/reports/tr35/|
@@ -4021,6 +4021,9 @@ Other contributors to CLDR are listed on the [CLDR Project Page](https://www.uni
* [Numbers](tr35-numbers.md#Contents)
* In [Supplemental Currency Data](tr35-numbers.md#Supplemental_Currency_Data), for the `currency` element, added attributes `tz` and `to-tz` to clarify the `from` and `to` dates.
+* [Supplemental](tr35-info.md#Contents)
+ * In [Conversion Data](tr35-info.md#conversion-data), added the `special` attribute for `convertUnit`, used for handling beaufort.
+
**Differences from LDML Version 43 to 44.1**
* [Core](#Contents)
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/test/LogicalGroupChecker.java b/tools/cldr-code/src/main/java/org/unicode/cldr/test/LogicalGroupChecker.java
index 7400d5f4e7f..0fcb44d48bc 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/test/LogicalGroupChecker.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/test/LogicalGroupChecker.java
@@ -161,6 +161,10 @@ private void getMissingRequiredPaths() {
if (optionalPaths.contains(apath)) {
continue;
}
+ if (apath.contains("beaufort")) {
+ // TODO CLDR-17352 Missing grammatical inflections for unit Beaufort in many locales
+ continue;
+ }
if (presentPaths.contains(apath)) {
groupHasOneOrMorePresentRequiredPaths = true;
} else {
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java
index 8c5a390e878..2466cef0ae7 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/ChartUnitConversions.java
@@ -139,6 +139,7 @@ public void writeContents(FormattedFileWriter pw) throws IOException {
all.add(sortKey);
// get some formatted strings
+ // TODO: handle specials here, CLDR-16329 additional PR or follow-on ticket
final String repeatingFactor =
targetInfo.unitInfo.factor.toString(FormatStyle.repeating);
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/UnitValidityCanonicalizer.java b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/UnitValidityCanonicalizer.java
index 6ccb414d2b8..4b8c7b4652e 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/tool/UnitValidityCanonicalizer.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/tool/UnitValidityCanonicalizer.java
@@ -89,12 +89,7 @@ public int compare(String o1, String o2) {
Output metricUnit2 = new Output<>();
ConversionInfo ci2 = uc.parseUnitId(tc2.core, metricUnit2, false);
- comp = ci1.factor.compareTo(ci2.factor);
- if (comp != 0) {
- return comp;
- }
-
- comp = ci2.offset.compareTo(ci2.offset);
+ comp = ci1.compareTo(ci2);
if (comp != 0) {
return comp;
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
index 2e0314a0c0f..e4e0438b8e6 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/SupplementalDataInfo.java
@@ -1645,8 +1645,9 @@ private boolean handleUnitConversion(XPathParts parts) {
// }
String factor = parts.getAttributeValue(-1, "factor");
String offset = parts.getAttributeValue(-1, "offset");
+ String special = parts.getAttributeValue(-1, "special");
String systems = parts.getAttributeValue(-1, "systems");
- unitConverter.addRaw(source, target, factor, offset, systems);
+ unitConverter.addRaw(source, target, factor, offset, special, systems);
return true;
}
diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java
index 47d97ca8c10..3b6fdeeec51 100644
--- a/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java
+++ b/tools/cldr-code/src/main/java/org/unicode/cldr/util/UnitConverter.java
@@ -241,23 +241,105 @@ public UnitConverter cloneAsThawed() {
public static final class ConversionInfo implements Comparable {
public final Rational factor;
public final Rational offset;
+ public String special;
+ public boolean specialInverse; // only used with special
static final ConversionInfo IDENTITY = new ConversionInfo(Rational.ONE, Rational.ZERO);
public ConversionInfo(Rational factor, Rational offset) {
this.factor = factor;
this.offset = offset;
+ this.special = null;
+ this.specialInverse = false;
+ }
+
+ public ConversionInfo(String special, boolean inverse) {
+ this.factor = Rational.ZERO; // if ONE it will be treated as a base unit
+ this.offset = Rational.ZERO;
+ this.special = special;
+ this.specialInverse = inverse;
}
public Rational convert(Rational source) {
+ if (special != null) {
+ if (special.equals("beaufort")) {
+ return (specialInverse)
+ ? baseToScale(source, minMetersPerSecForBeaufort)
+ : scaleToBase(source, minMetersPerSecForBeaufort);
+ }
+ return source;
+ }
return source.multiply(factor).add(offset);
}
public Rational convertBackwards(Rational source) {
+ if (special != null) {
+ if (special.equals("beaufort")) {
+ return (specialInverse)
+ ? scaleToBase(source, minMetersPerSecForBeaufort)
+ : baseToScale(source, minMetersPerSecForBeaufort);
+ }
+ return source;
+ }
return source.subtract(offset).divide(factor);
}
+ private static final Rational[] minMetersPerSecForBeaufort = {
+ // minimum m/s values for each Bft value, plus an extra artificial value
+ // from table in Wikipedia, except for artificial value
+ // since 0 based, max Beaufort value is thus array dimension minus 2
+ Rational.of("0.0"), // 0 Bft
+ Rational.of("0.3"), // 1
+ Rational.of("1.6"), // 2
+ Rational.of("3.4"), // 3
+ Rational.of("5.5"), // 4
+ Rational.of("8.0"), // 5
+ Rational.of("10.8"), // 6
+ Rational.of("13.9"), // 7
+ Rational.of("17.2"), // 8
+ Rational.of("20.8"), // 9
+ Rational.of("24.5"), // 10
+ Rational.of("28.5"), // 11
+ Rational.of("32.7"), // 12
+ Rational.of("36.9"), // 13
+ Rational.of("41.4"), // 14
+ Rational.of("46.1"), // 15
+ Rational.of("51.1"), // 16
+ Rational.of("55.8"), // 17
+ Rational.of("61.4"), // artificial end of range 17 to give reasonable midpoint
+ };
+
+ private Rational scaleToBase(Rational scaleValue, Rational[] minBaseForScaleValues) {
+ BigInteger scaleRound = scaleValue.abs().add(Rational.of(1, 2)).floor();
+ BigInteger scaleMax = BigInteger.valueOf(minBaseForScaleValues.length - 2);
+ if (scaleRound.compareTo(scaleMax) > 0) {
+ scaleRound = scaleMax;
+ }
+ int scaleIndex = scaleRound.intValue();
+ // Return midpont of range (the final range uses an articial end to produce reasonable
+ // midpoint)
+ return minBaseForScaleValues[scaleIndex]
+ .add(minBaseForScaleValues[scaleIndex + 1])
+ .divide(Rational.TWO);
+ }
+
+ private Rational baseToScale(Rational baseValue, Rational[] minBaseForScaleValues) {
+ int scaleIndex = Arrays.binarySearch(minBaseForScaleValues, baseValue.abs());
+ if (scaleIndex < 0) {
+ // since out first array entry is 0, this value will always be -2 or less
+ scaleIndex = -scaleIndex - 2;
+ }
+ int scaleMax = minBaseForScaleValues.length - 2;
+ if (scaleIndex > scaleMax) {
+ scaleIndex = scaleMax;
+ }
+ return Rational.of(scaleIndex);
+ }
+
public ConversionInfo invert() {
+ if (special != null) {
+ return new ConversionInfo(special, !specialInverse);
+ }
Rational factor2 = factor.reciprocal();
Rational offset2 =
offset.equals(Rational.ZERO) ? Rational.ZERO : offset.divide(factor).negate();
@@ -271,12 +353,15 @@ public String toString() {
}
public String toString(String unit) {
+ if (special != null) {
+ return "special" + (specialInverse ? "inv" : "") + ":" + special + "(" + unit + ")";
+ }
return factor.toString(FormatStyle.formatted)
+ " * "
+ unit
+ (offset.equals(Rational.ZERO)
? ""
- : (offset.compareTo(Rational.ZERO) < 0 ? " - " : " - ")
+ : (offset.compareTo(Rational.ZERO) < 0 ? " - " : " + ")
+ offset.abs().toString(FormatStyle.formatted));
}
@@ -285,18 +370,40 @@ public String toDecimal() {
}
public String toDecimal(String unit) {
+ if (special != null) {
+ return "special" + (specialInverse ? "inv" : "") + ":" + special + "(" + unit + ")";
+ }
return factor.toBigDecimal(MathContext.DECIMAL64)
+ " * "
+ unit
+ (offset.equals(Rational.ZERO)
? ""
- : (offset.compareTo(Rational.ZERO) < 0 ? " - " : " - ")
+ : (offset.compareTo(Rational.ZERO) < 0 ? " - " : " + ")
+ offset.toBigDecimal(MathContext.DECIMAL64).abs());
}
@Override
public int compareTo(ConversionInfo o) {
+ // All specials sort at the end
int diff;
+ if (special != null) {
+ if (o.special == null) {
+ return 1; // This is special, other is not
+ }
+ // Both are special check names
+ if (0 != (diff = special.compareTo(o.special))) {
+ return diff;
+ }
+ // Among specials with the same name, inverses sort later
+ if (specialInverse != o.specialInverse) {
+ return (specialInverse) ? 1 : -1;
+ }
+ return 0;
+ }
+ if (o.special != null) {
+ return -1; // This is not special, other is
+ }
+ // Neither this nor other is special
if (0 != (diff = factor.compareTo(o.factor))) {
return diff;
}
@@ -310,7 +417,7 @@ public boolean equals(Object obj) {
@Override
public int hashCode() {
- return Objects.hash(factor, offset);
+ return Objects.hash(factor, offset, (special == null) ? "" : special);
}
}
@@ -423,11 +530,26 @@ public UnitConverter(RationalParser rationalParser, Validity validity) {
// SHORT_TO_LONG_ID = ImmutableBiMap.copyOf(_SHORT_TO_LONG_ID);
}
- public void addRaw(String source, String target, String factor, String offset, String systems) {
- ConversionInfo info =
- new ConversionInfo(
- factor == null ? Rational.ONE : rationalParser.parse(factor),
- offset == null ? Rational.ZERO : rationalParser.parse(offset));
+ public void addRaw(
+ String source,
+ String target,
+ String factor,
+ String offset,
+ String special,
+ String systems) {
+ ConversionInfo info;
+ if (special != null) {
+ info = new ConversionInfo(special, false);
+ if (factor != null || offset != null) {
+ throw new IllegalArgumentException(
+ "Cannot have factor or offset with special=" + special);
+ }
+ } else {
+ info =
+ new ConversionInfo(
+ factor == null ? Rational.ONE : rationalParser.parse(factor),
+ offset == null ? Rational.ZERO : rationalParser.parse(offset));
+ }
Map args = new LinkedHashMap<>();
if (factor != null) {
args.put("factor", factor);
@@ -435,6 +557,9 @@ public void addRaw(String source, String target, String factor, String offset, S
if (offset != null) {
args.put("offset", offset);
}
+ if (special != null) {
+ args.put("special", special);
+ }
addToSourceToTarget(source, target, info, args, systems);
Continuation.addIfNeeded(source, continuations);
@@ -749,7 +874,10 @@ public ConversionInfo parseUnitId(
}
String baseUnit = info.target;
- value = info.unitInfo.factor.multiply(value);
+ value =
+ (info.unitInfo.special == null)
+ ? info.unitInfo.factor.multiply(value)
+ : info.unitInfo.convert(value);
// if (showYourWork && !info.unitInfo.factor.equals(Rational.ONE))
// System.out.println(showRational("\tfactor: ", info.unitInfo.factor,
// baseUnit));
@@ -763,6 +891,8 @@ public ConversionInfo parseUnitId(
unit = baseUnit;
}
for (int p = 1; p <= power; ++p) {
+ // TODO CLDR-16329 additional PR or follow-on ticket, how to handle special
+ // here?
String title = "";
if (value.equals(Rational.ONE)) {
if (showYourWork) System.out.println("\t(already base unit)");
@@ -783,15 +913,18 @@ public ConversionInfo parseUnitId(
+ numerator.divide(denominator).doubleValue());
}
// create cleaned up target unitid
+ // TODO CLDR-16329 additional PR or follow-on ticket, how to handle special here?
outputUnit.add(continuations, unit, inNumerator, power);
power = 1;
}
}
+ // TODO CLDR-16329 additional PR or follow-on ticket, how to handle special here?
metricUnit.value = outputUnit.toString();
return new ConversionInfo(numerator.divide(denominator), offset);
}
/** Only for use for simple base unit comparison */
+ // Thus we do not need to handle specials here
private class UnitComparator implements Comparator {
// TODO, use order in units.xml
@@ -826,6 +959,7 @@ public int compare(String o1, String o2) {
Comparator UNIT_COMPARATOR = new UnitComparator();
/** Only handles the canonical units; no kilo-, only normalized, etc. */
+ // Thus we do not need to handle specials here
// TODO: optimize
// • the comparators don't have to be fields in this class;
// it is not a static class, so they can be on the converter.
@@ -1989,7 +2123,8 @@ public Map getRelatedExamples(
"foodcalorie",
"nautical-mile",
"mile-scandinavian",
- "knot"));
+ "knot",
+ "beaufort"));
Map result = new TreeMap<>(Comparator.reverseOrder());
diff --git a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt
index ce325dd3276..21cca8ca4f8 100644
--- a/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt
+++ b/tools/cldr-code/src/main/resources/org/unicode/cldr/util/data/PathHeader.txt
@@ -540,6 +540,7 @@
//supplementalData/convertUnits/convertUnit[@source="%A"]/_baseUnit ; Supplemental ; Units ; Convert ; $1-baseUnit ; HIDE
//supplementalData/convertUnits/convertUnit[@source="%A"]/_factor ; Supplemental ; Units ; Convert ; $1-factor ; HIDE
//supplementalData/convertUnits/convertUnit[@source="%A"]/_offset ; Supplemental ; Units ; Convert ; $1-offset ; HIDE
+//supplementalData/convertUnits/convertUnit[@source="%A"]/_special ; Supplemental ; Units ; Convert ; $1-special ; HIDE
//supplementalData/convertUnits/convertUnit[@source="%A"]/_reciprocal ; Supplemental ; Units ; Convert ; $1-reciprocal ; HIDE
//supplementalData/convertUnits/convertUnit[@source="%A"]/_systems ; Supplemental ; Units ; Convert ; $1-systems ; HIDE