From 8da539a02c70e6b35fd75f4634b4d04d50a8b29e Mon Sep 17 00:00:00 2001 From: macchiati Date: Tue, 9 Jan 2024 21:32:53 -0800 Subject: [PATCH] Start adding shimmed properties --- .../props/ShimUnicodePropertyFactory.java | 78 ++++++++++++ ...=> CheckIndexVsToolUnicodeProperties.java} | 119 ++++++++++++++---- 2 files changed, 172 insertions(+), 25 deletions(-) create mode 100644 unicodetools/src/main/java/org/unicode/props/ShimUnicodePropertyFactory.java rename unicodetools/src/test/java/org/unicode/propstest/{TestIndexVsToolUnicodeProperties.java => CheckIndexVsToolUnicodeProperties.java} (62%) diff --git a/unicodetools/src/main/java/org/unicode/props/ShimUnicodePropertyFactory.java b/unicodetools/src/main/java/org/unicode/props/ShimUnicodePropertyFactory.java new file mode 100644 index 000000000..9fa2bbb8a --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/props/ShimUnicodePropertyFactory.java @@ -0,0 +1,78 @@ +package org.unicode.props; + +import com.ibm.icu.dev.util.UnicodeMap; +import com.ibm.icu.text.UTF16; +import com.ibm.icu.text.UnicodeSet; +import com.ibm.icu.text.UnicodeSet.EntryRange; +import org.unicode.props.UnicodeProperty.BaseProperty; + +public class ShimUnicodePropertyFactory extends UnicodeProperty.Factory { + + public ShimUnicodePropertyFactory(UnicodeProperty.Factory factory) { + for (String propName : factory.getAvailableNames()) { + UnicodeProperty prop = factory.getProperty(propName); + switch (propName) { + case "Joining_Type": + prop = modifyJoining_Type(prop); + break; + case "Bidi_Mirroring_Glyph": + prop = modifyBidi_Mirroring_Glyph(prop); + break; + case "Bidi_Paired_Bracket": + prop = modifyBidi_Paired_Bracket(prop); + break; + } + add(prop); + } + } + + private UnicodeProperty modifyBidi_Paired_Bracket(UnicodeProperty prop) { + UnicodeMap map = prop.getUnicodeMap(); + UnicodeMap newMap = new UnicodeMap<>(map); + UnicodeSet nullValues = map.getSet(null); + for (EntryRange range : nullValues.ranges()) { + for (int cp = range.codepoint; cp <= range.codepointEnd; ++cp) { + // set all the values to NUL + newMap.put(cp, "\u0000"); + } + } + return newProp(prop, newMap); + } + + private UnicodeProperty modifyBidi_Mirroring_Glyph(UnicodeProperty prop) { + UnicodeMap map = prop.getUnicodeMap(); + UnicodeMap newMap = new UnicodeMap<>(map); + // for each null valued range + for (EntryRange range : map.keySet().complement().ranges()) { + for (int cp = range.codepoint; cp <= range.codepointEnd; ++cp) { + // set all the values to identity + newMap.put(cp, UTF16.valueOf(cp)); + } + } + return newProp(prop, newMap); + } + + private UnicodeProperty modifyJoining_Type(UnicodeProperty prop) { + UnicodeMap map = new UnicodeMap<>(prop.getUnicodeMap()); + UnicodeSet defaultTransparent = new UnicodeSet("[[:Cf:][:Me:][:Mn:]]"); + for (EntryRange range : defaultTransparent.ranges()) { + for (int cp = range.codepoint; cp <= range.codepointEnd; ++cp) { + String oldValue = map.get(cp); + if (oldValue.equals("Non_Joining")) { + map.put(cp, "Transparent"); + } + } + } + return newProp(prop, map); + } + + public BaseProperty newProp(UnicodeProperty prop, UnicodeMap newMap) { + return new UnicodeProperty.UnicodeMapProperty() + .set(newMap) + .setMain( + prop.getName(), + prop.getFirstNameAlias(), + prop.getType(), + prop.getVersion()); + } +} diff --git a/unicodetools/src/test/java/org/unicode/propstest/TestIndexVsToolUnicodeProperties.java b/unicodetools/src/test/java/org/unicode/propstest/CheckIndexVsToolUnicodeProperties.java similarity index 62% rename from unicodetools/src/test/java/org/unicode/propstest/TestIndexVsToolUnicodeProperties.java rename to unicodetools/src/test/java/org/unicode/propstest/CheckIndexVsToolUnicodeProperties.java index e1cb9f005..549bfd5c4 100644 --- a/unicodetools/src/test/java/org/unicode/propstest/TestIndexVsToolUnicodeProperties.java +++ b/unicodetools/src/test/java/org/unicode/propstest/CheckIndexVsToolUnicodeProperties.java @@ -6,37 +6,56 @@ import com.ibm.icu.dev.util.UnicodeMap; import com.ibm.icu.impl.Pair; import com.ibm.icu.text.UnicodeSet; +import java.math.BigDecimal; import java.util.Collection; import java.util.LinkedHashSet; import java.util.Set; -import org.junit.jupiter.api.Test; import org.unicode.cldr.util.CodePointEscaper; +import org.unicode.cldr.util.Counter; +import org.unicode.cldr.util.Rational; import org.unicode.cldr.util.SimpleUnicodeSetFormatter; import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.ShimUnicodePropertyFactory; import org.unicode.props.UnicodeProperty; import org.unicode.text.UCD.Default; import org.unicode.text.UCD.ToolUnicodePropertySource; import org.unicode.text.utility.Utility; -import org.unicode.unittest.TestFmwkMinusMinus; -public class TestIndexVsToolUnicodeProperties extends TestFmwkMinusMinus { - private static final int MAX_USET_ITEMS = 15; +public class CheckIndexVsToolUnicodeProperties { + final int MAX_USET_ITEMS = 15; - private static final IndexUnicodeProperties iup = - IndexUnicodeProperties.make(Default.ucdVersion()); + final ShimUnicodePropertyFactory iup = + new ShimUnicodePropertyFactory(IndexUnicodeProperties.make(Default.ucdVersion())); - private static final ToolUnicodePropertySource tup = - ToolUnicodePropertySource.make(Default.ucdVersion()); + final ToolUnicodePropertySource tup = ToolUnicodePropertySource.make(Default.ucdVersion()); SimpleUnicodeSetFormatter susetFormatter = new SimpleUnicodeSetFormatter(); - @Test + // null to skip + final Set debugLimited = null; + //final Set debugLimited = ImmutableSet.of("Bidi_Paired_Bracket"); + final UnicodeSet debugItems = new UnicodeSet("[\\x{0}]"); + + enum Shim { + equals, + diffDefault, + diffNumberFormat, + different, + } + + public static void main(String[] args) { + new CheckIndexVsToolUnicodeProperties().TestProperties(); + } + public void TestProperties() { - warnln("Comparing values for " + Default.ucdVersion()); + warnln("\tComparing values for " + Default.ucdVersion()); Set iupNames = new LinkedHashSet<>(iup.getAvailableNames()); Set tupNames = new LinkedHashSet<>(tup.getAvailableNames()); Set common = Sets.intersection(iupNames, tupNames); + if (debugLimited != null) { + common = debugLimited; + } Set iupMissing = Sets.difference(tupNames, iupNames); warnln( @@ -53,35 +72,46 @@ public void TestProperties() { + Joiner.on(' ').join(tupMissing)); for (String propName : common) { - // warnln(propName); UnicodeProperty iupProp = iup.getProperty(propName); UnicodeProperty tupProp = tup.getProperty(propName); UnicodeSet iupNullTupEmpty = new UnicodeSet(); UnicodeMap> iupDiffTup = new UnicodeMap<>(); + Counter shims = new Counter<>(); for (int i = 0x0; i <= 0x10ffff; ++i) { + if (debugItems.contains(i)) { + int debug = 0; // stop if debugging + } String iupValue = iupProp.getValue(i); String tupValue = tupProp.getValue(i); - if (!Objects.equal(iupValue, tupValue)) { - if (iupValue == null && "".equals(tupValue) - || iupValue != null - && "NaN".equals(iupValue.toString()) - && tupValue == null) { - iupNullTupEmpty.add(i); - } else { - iupDiffTup.put(i, Pair.of(showContents(iupValue), showContents(tupValue))); + final Shim shim = equalsShim(propName, iupValue, tupValue); + if (shim != Shim.equals) { + shims.add(shim, 1); + + switch (shim) { + case equals: + break; + case diffDefault: + iupNullTupEmpty.add(i); + break; + case diffNumberFormat: + iupNullTupEmpty.add(i); + break; + case different: + equalsShim(propName, iupValue, tupValue); + iupDiffTup.put( + i, Pair.of(showContents(iupValue), showContents(tupValue))); + break; } } } if (!iupDiffTup.isEmpty()) { - int count = iupDiffTup.size(); - final Collection> values = iupDiffTup.getAvailableValues(); int valueCount = 0; UnicodeSet remaining = new UnicodeSet(iupDiffTup.keySet()); for (Pair value : values) { final UnicodeSet uset = iupDiffTup.getSet(value); - errln("\t" + propName + showLine(uset, value.first, value.second)); + errln("\t" + propName + showLine(uset, value.first, value.second, null)); remaining.removeAll(uset); if (++valueCount > 5) { errln( @@ -91,6 +121,8 @@ public void TestProperties() { + remaining.size() + "\t" + format(remaining, 30) + + "\t" + + shims + "\tothers"); break; } @@ -100,24 +132,61 @@ public void TestProperties() { warnln( "\t" + propName - + showLine(iupNullTupEmpty, showContents(null), showContents(""))); + + showLine( + iupNullTupEmpty, + showContents(null), + showContents(""), + shims)); } } } + private void errln(String string) { + System.out.println("SEVERE" + string); + } + + private void warnln(String string) { + System.out.println("WARNING" + string); + } + + private Shim equalsShim(String propName, String iupValue, String tupValue) { + if (Objects.equal(iupValue, tupValue)) { + return Shim.equals; + } else if (iupValue == null && "".equals(tupValue) + || iupValue != null && "NaN".equals(iupValue.toString()) && tupValue == null) { + return Shim.diffDefault; + } else if (numericValueEquals(propName, iupValue, tupValue)) { + return Shim.diffNumberFormat; + } else { + return Shim.different; + } + } + + private boolean numericValueEquals(String propName, String iupValue, String tupValue) { + if (!propName.equals("Numeric_Value")) { + return false; + } + Rational iupRational = Rational.of(iupValue); + Rational tupRational = Rational.of(BigDecimal.valueOf(Double.parseDouble(tupValue))); + return iupRational.approximatelyEquals(tupRational); + } + public String showContents(String iupValue) { return iupValue == null ? "{NULL}" : iupValue.isBlank() ? "{EMPTY}" : format(iupValue); } - private String showLine(UnicodeSet failures, String iupValue, String tupValue) { + private String showLine( + UnicodeSet failures, String iupValue, String tupValue, Counter shims) { return "\t" + failures.size() + "\t" + format(failures, MAX_USET_ITEMS) + + "\t" + "\tIUP\t" + iupValue + "\t≠\tTUP\t" - + tupValue; + + tupValue + + (shims == null ? "" : "\t" + shims); } // copied from CLDR, should make public there