Skip to content

Commit

Permalink
Start adding shimmed properties
Browse files Browse the repository at this point in the history
  • Loading branch information
macchiati committed Jan 10, 2024
1 parent fa900d8 commit 8da539a
Show file tree
Hide file tree
Showing 2 changed files with 172 additions and 25 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
package org.unicode.props;

import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.text.UTF16;
import com.ibm.icu.text.UnicodeSet;
import com.ibm.icu.text.UnicodeSet.EntryRange;
import org.unicode.props.UnicodeProperty.BaseProperty;

public class ShimUnicodePropertyFactory extends UnicodeProperty.Factory {

public ShimUnicodePropertyFactory(UnicodeProperty.Factory factory) {
for (String propName : factory.getAvailableNames()) {
UnicodeProperty prop = factory.getProperty(propName);
switch (propName) {
case "Joining_Type":
prop = modifyJoining_Type(prop);
break;
case "Bidi_Mirroring_Glyph":
prop = modifyBidi_Mirroring_Glyph(prop);
break;
case "Bidi_Paired_Bracket":
prop = modifyBidi_Paired_Bracket(prop);
break;
}
add(prop);
}
}

private UnicodeProperty modifyBidi_Paired_Bracket(UnicodeProperty prop) {
UnicodeMap<String> map = prop.getUnicodeMap();
UnicodeMap<String> newMap = new UnicodeMap<>(map);
UnicodeSet nullValues = map.getSet(null);
for (EntryRange range : nullValues.ranges()) {
for (int cp = range.codepoint; cp <= range.codepointEnd; ++cp) {
// set all the values to NUL
newMap.put(cp, "\u0000");
}
}
return newProp(prop, newMap);
}

private UnicodeProperty modifyBidi_Mirroring_Glyph(UnicodeProperty prop) {
UnicodeMap<String> map = prop.getUnicodeMap();
UnicodeMap<String> newMap = new UnicodeMap<>(map);
// for each null valued range
for (EntryRange range : map.keySet().complement().ranges()) {
for (int cp = range.codepoint; cp <= range.codepointEnd; ++cp) {
// set all the values to identity
newMap.put(cp, UTF16.valueOf(cp));
}
}
return newProp(prop, newMap);
}

private UnicodeProperty modifyJoining_Type(UnicodeProperty prop) {
UnicodeMap<String> map = new UnicodeMap<>(prop.getUnicodeMap());
UnicodeSet defaultTransparent = new UnicodeSet("[[:Cf:][:Me:][:Mn:]]");
for (EntryRange range : defaultTransparent.ranges()) {
for (int cp = range.codepoint; cp <= range.codepointEnd; ++cp) {
String oldValue = map.get(cp);
if (oldValue.equals("Non_Joining")) {
map.put(cp, "Transparent");
}
}
}
return newProp(prop, map);
}

public BaseProperty newProp(UnicodeProperty prop, UnicodeMap<String> newMap) {
return new UnicodeProperty.UnicodeMapProperty()
.set(newMap)
.setMain(
prop.getName(),
prop.getFirstNameAlias(),
prop.getType(),
prop.getVersion());
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -6,37 +6,56 @@
import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.impl.Pair;
import com.ibm.icu.text.UnicodeSet;
import java.math.BigDecimal;
import java.util.Collection;
import java.util.LinkedHashSet;
import java.util.Set;
import org.junit.jupiter.api.Test;
import org.unicode.cldr.util.CodePointEscaper;
import org.unicode.cldr.util.Counter;
import org.unicode.cldr.util.Rational;
import org.unicode.cldr.util.SimpleUnicodeSetFormatter;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.ShimUnicodePropertyFactory;
import org.unicode.props.UnicodeProperty;
import org.unicode.text.UCD.Default;
import org.unicode.text.UCD.ToolUnicodePropertySource;
import org.unicode.text.utility.Utility;
import org.unicode.unittest.TestFmwkMinusMinus;

public class TestIndexVsToolUnicodeProperties extends TestFmwkMinusMinus {
private static final int MAX_USET_ITEMS = 15;
public class CheckIndexVsToolUnicodeProperties {
final int MAX_USET_ITEMS = 15;

private static final IndexUnicodeProperties iup =
IndexUnicodeProperties.make(Default.ucdVersion());
final ShimUnicodePropertyFactory iup =
new ShimUnicodePropertyFactory(IndexUnicodeProperties.make(Default.ucdVersion()));

private static final ToolUnicodePropertySource tup =
ToolUnicodePropertySource.make(Default.ucdVersion());
final ToolUnicodePropertySource tup = ToolUnicodePropertySource.make(Default.ucdVersion());

SimpleUnicodeSetFormatter susetFormatter = new SimpleUnicodeSetFormatter();

@Test
// null to skip
final Set<String> debugLimited = null;
//final Set<String> debugLimited = ImmutableSet.of("Bidi_Paired_Bracket");
final UnicodeSet debugItems = new UnicodeSet("[\\x{0}]");

enum Shim {
equals,
diffDefault,
diffNumberFormat,
different,
}

public static void main(String[] args) {
new CheckIndexVsToolUnicodeProperties().TestProperties();
}

public void TestProperties() {

warnln("Comparing values for " + Default.ucdVersion());
warnln("\tComparing values for " + Default.ucdVersion());
Set<String> iupNames = new LinkedHashSet<>(iup.getAvailableNames());
Set<String> tupNames = new LinkedHashSet<>(tup.getAvailableNames());
Set<String> common = Sets.intersection(iupNames, tupNames);
if (debugLimited != null) {
common = debugLimited;
}

Set<String> iupMissing = Sets.difference(tupNames, iupNames);
warnln(
Expand All @@ -53,35 +72,46 @@ public void TestProperties() {
+ Joiner.on(' ').join(tupMissing));

for (String propName : common) {
// warnln(propName);
UnicodeProperty iupProp = iup.getProperty(propName);
UnicodeProperty tupProp = tup.getProperty(propName);
UnicodeSet iupNullTupEmpty = new UnicodeSet();
UnicodeMap<Pair<String, String>> iupDiffTup = new UnicodeMap<>();
Counter<Shim> shims = new Counter<>();

for (int i = 0x0; i <= 0x10ffff; ++i) {
if (debugItems.contains(i)) {
int debug = 0; // stop if debugging
}
String iupValue = iupProp.getValue(i);
String tupValue = tupProp.getValue(i);
if (!Objects.equal(iupValue, tupValue)) {
if (iupValue == null && "".equals(tupValue)
|| iupValue != null
&& "NaN".equals(iupValue.toString())
&& tupValue == null) {
iupNullTupEmpty.add(i);
} else {
iupDiffTup.put(i, Pair.of(showContents(iupValue), showContents(tupValue)));
final Shim shim = equalsShim(propName, iupValue, tupValue);
if (shim != Shim.equals) {
shims.add(shim, 1);

switch (shim) {
case equals:
break;
case diffDefault:
iupNullTupEmpty.add(i);
break;
case diffNumberFormat:
iupNullTupEmpty.add(i);
break;
case different:
equalsShim(propName, iupValue, tupValue);
iupDiffTup.put(
i, Pair.of(showContents(iupValue), showContents(tupValue)));
break;
}
}
}
if (!iupDiffTup.isEmpty()) {
int count = iupDiffTup.size();

final Collection<Pair<String, String>> values = iupDiffTup.getAvailableValues();
int valueCount = 0;
UnicodeSet remaining = new UnicodeSet(iupDiffTup.keySet());
for (Pair<String, String> value : values) {
final UnicodeSet uset = iupDiffTup.getSet(value);
errln("\t" + propName + showLine(uset, value.first, value.second));
errln("\t" + propName + showLine(uset, value.first, value.second, null));
remaining.removeAll(uset);
if (++valueCount > 5) {
errln(
Expand All @@ -91,6 +121,8 @@ public void TestProperties() {
+ remaining.size()
+ "\t"
+ format(remaining, 30)
+ "\t"
+ shims
+ "\tothers");
break;
}
Expand All @@ -100,24 +132,61 @@ public void TestProperties() {
warnln(
"\t"
+ propName
+ showLine(iupNullTupEmpty, showContents(null), showContents("")));
+ showLine(
iupNullTupEmpty,
showContents(null),
showContents(""),
shims));
}
}
}

private void errln(String string) {
System.out.println("SEVERE" + string);
}

private void warnln(String string) {
System.out.println("WARNING" + string);
}

private Shim equalsShim(String propName, String iupValue, String tupValue) {
if (Objects.equal(iupValue, tupValue)) {
return Shim.equals;
} else if (iupValue == null && "".equals(tupValue)
|| iupValue != null && "NaN".equals(iupValue.toString()) && tupValue == null) {
return Shim.diffDefault;
} else if (numericValueEquals(propName, iupValue, tupValue)) {
return Shim.diffNumberFormat;
} else {
return Shim.different;
}
}

private boolean numericValueEquals(String propName, String iupValue, String tupValue) {
if (!propName.equals("Numeric_Value")) {
return false;
}
Rational iupRational = Rational.of(iupValue);
Rational tupRational = Rational.of(BigDecimal.valueOf(Double.parseDouble(tupValue)));
return iupRational.approximatelyEquals(tupRational);
}

public String showContents(String iupValue) {
return iupValue == null ? "{NULL}" : iupValue.isBlank() ? "{EMPTY}" : format(iupValue);
}

private String showLine(UnicodeSet failures, String iupValue, String tupValue) {
private String showLine(
UnicodeSet failures, String iupValue, String tupValue, Counter<Shim> shims) {
return "\t"
+ failures.size()
+ "\t"
+ format(failures, MAX_USET_ITEMS)
+ "\t"
+ "\tIUP\t"
+ iupValue
+ "\t\tTUP\t"
+ tupValue;
+ tupValue
+ (shims == null ? "" : "\t" + shims);
}

// copied from CLDR, should make public there
Expand Down

0 comments on commit 8da539a

Please sign in to comment.