diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java index 64e468b65..420da88a1 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/GenerateConfusablesCopy.java @@ -777,9 +777,13 @@ private void loadFileData() throws IOException { final String codelist = pieces[0].trim(); final Reason reasons = Reason.fromString(pieces[1]); if (pieces[0].startsWith("[")) { + // TODO(macchiati): Weird dependency on ChainedSymbolTable which we probably + // do not need. sources = - TestUnicodeInvariants.parseUnicodeSet( - codelist); // .retainAll(allocated); + VersionedProperty.parseUnicodeSet( + codelist, + new TestUnicodeInvariants + .ChainedSymbolTable()); // .retainAll(allocated); } else { final String[] codes = Utility.split(codelist, ' '); for (final String code : codes) { diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java b/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java index c3726443b..2c90525ee 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/IdentifierInfo.java @@ -435,9 +435,13 @@ private void loadFileData() throws IOException { } final String codelist = pieces[0].trim(); if (UnicodeSet.resemblesPattern(pieces[0], 0)) { + // TODO(macchiati): Weird dependency on ChainedSymbolTable which we probably + // do not need. sources = - TestUnicodeInvariants.parseUnicodeSet( - codelist); // .retainAll(allocated); + VersionedProperty.parseUnicodeSet( + codelist, + new TestUnicodeInvariants + .ChainedSymbolTable()); // .retainAll(allocated); if (sources.contains("ᢰ")) { int x = 0; } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java index 9327e02ff..f664472ba 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/TestUnicodeInvariants.java @@ -20,7 +20,6 @@ import java.util.LinkedHashSet; import java.util.List; import java.util.Map; -import java.util.Set; import java.util.TreeMap; import org.unicode.cldr.draft.FileUtilities; import org.unicode.cldr.util.Tabber; @@ -31,7 +30,6 @@ import org.unicode.props.IndexUnicodeProperties; import org.unicode.props.UnicodeProperty; import org.unicode.props.UnicodeProperty.Factory; -import org.unicode.props.UnicodeProperty.PatternMatcher; import org.unicode.text.utility.Settings; public class TestUnicodeInvariants { @@ -39,9 +37,7 @@ public class TestUnicodeInvariants { // private static final Pattern IN_PATTERN = Pattern.compile("(.*)([≠=])(.*)"); private static final boolean ICU_VERSION = false; // ignore the versions if this is true - private static final String LATEST_VERSION = Settings.latestVersion; // "5.2.0"; // - private static final Factory LATEST_PROPS = getProperties(LATEST_VERSION); - private static final String LAST_VERSION = Settings.lastVersion; // "5.1.0"; // + private static final Factory LATEST_PROPS = getProperties(Settings.latestVersion); private static final boolean SHOW_LOOKUP = false; private static int showRangeLimit = 20; static boolean doHtml = true; @@ -580,7 +576,7 @@ static UnicodeProperty of( if (propName.length() > 0) { final FilterOrProp propOrFilter = new FilterOrProp(); final VersionedProperty xprop = new VersionedProperty().set(propName); - propOrFilter.prop = xprop.property; + propOrFilter.prop = xprop.getProperty(); if (propOrFilter.prop == null) { throw new IllegalArgumentException( "Can't create property for: " + propName); @@ -1217,10 +1213,6 @@ private static Factory getProperties(final String version) { return ICU_VERSION ? ICUPropertyFactory.make() : ToolUnicodePropertySource.make(version); } - private static Factory getIndexedProperties(String version2) { - return IndexUnicodeProperties.make(version2); - } - static class ChainedSymbolTable extends UnicodeSet.XSymbolTable { private static final Comparator LONGEST_FIRST = @@ -1289,133 +1281,15 @@ public String parseReference(String text, ParsePosition pos, int limit) { public boolean applyPropertyAlias( String propertyName2, String propertyValue, UnicodeSet result) { result.clear(); - result.addAll(propertyVersion.set(propertyName2).getSet(propertyValue)); + result.addAll( + propertyVersion + .set(propertyName2) + .getSet(propertyValue, symbolTable, symbolTable.variables)); return true; } } - static class VersionedProperty { - private String propertyName; - private String version; - private UnicodeProperty.Factory propSource; - private UnicodeProperty property; - private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher(); - - private static final Set TOOL_ONLY_PROPERTIES = - Set.of("toNFC", "toNFD", "toNFKC", "toNFKD"); - - private static boolean isTrivial(UnicodeMap map) { - return map.isEmpty() - || (map.values().size() == 1 - && map.getSet(map.values().iterator().next()) - .equals(UnicodeSet.ALL_CODE_POINTS)); - } - - public VersionedProperty set(String xPropertyName) { - xPropertyName = xPropertyName.trim(); - boolean allowRetroactive = false; - if (xPropertyName.contains(":")) { - final String[] names = xPropertyName.split(":"); - if (names.length != 2) { - throw new IllegalArgumentException("Too many ':' fields in " + xPropertyName); - } - if (names[0].isEmpty()) { - throw new IllegalArgumentException("Empty version field in " + xPropertyName); - } - switch (names[0].charAt(0)) { - case 'U': - break; - case 'R': - allowRetroactive = true; - break; - default: - throw new IllegalArgumentException( - "Version field should start with U or R in " + xPropertyName); - } - if (names[0].substring(1).equals("-1")) { - version = LAST_VERSION; - } else { - version = names[0].substring(1); - } - xPropertyName = names[1]; - } else { - version = LATEST_VERSION; - } - ; - propertyName = xPropertyName; - propSource = getIndexedProperties(version); - property = propSource.getProperty(xPropertyName); - if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName)) - || (isTrivial(property.getUnicodeMap()) && allowRetroactive)) { - propSource = getProperties(version); - property = propSource.getProperty(xPropertyName); - } - if (property == null || isTrivial(property.getUnicodeMap())) { - throw new IllegalArgumentException( - "Can't create property from name: " - + propertyName - + " and version: " - + version); - } - return this; - } - - public UnicodeSet getSet(String propertyValue) { - UnicodeSet set; - if (propertyValue.length() == 0) { - set = property.getSet("true"); - } else if (propertyValue.startsWith("/") && propertyValue.endsWith("/")) { - String body = propertyValue.substring(1, propertyValue.length() - 1); - for (final String variableMinus : symbolTable.variables.keySet()) { - final String variable = "$" + variableMinus; - if (body.contains(variable)) { - final String replacement = - String.copyValueOf(symbolTable.variables.get(variableMinus)); - final UnicodeSet value = parseUnicodeSet(replacement); - final String valueString = - value.complement(0).complement(0).toPattern(false); - body = body.replace(variable, valueString); - } - } - matcher.set(body); - set = property.getSet(matcher); - } else if (propertyValue.equals("∅")) { - set = property.getSet(NULL_MATCHER, null); - } else { - set = property.getSet(propertyValue); - } - return set; - } - } - - static final UnicodeProperty.PatternMatcher NULL_MATCHER = - new UnicodeProperty.PatternMatcher() { - @Override - public boolean test(String o) { - return o == null || "".equals(o); - } - - @Override - public PatternMatcher set(String pattern) { - return this; - } - }; - public static UnicodeSet parseUnicodeSet(String line, ParsePosition pp) { return new UnicodeSet(line, pp, symbolTable); } - - public static UnicodeSet parseUnicodeSet(String line) { - final ParsePosition pp = new ParsePosition(0); - final UnicodeSet result = new UnicodeSet(line, pp, symbolTable); - final int lengthUsed = pp.getIndex(); - if (lengthUsed != line.length()) { - throw new IllegalArgumentException( - "Text after end of set: " - + line.substring(0, lengthUsed) - + "XXX" - + line.substring(lengthUsed)); - } - return result; - } } diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java new file mode 100644 index 000000000..28fba5c14 --- /dev/null +++ b/unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java @@ -0,0 +1,141 @@ +package org.unicode.text.UCD; + +import com.ibm.icu.dev.util.UnicodeMap; +import com.ibm.icu.text.SymbolTable; +import com.ibm.icu.text.UnicodeSet; +import java.text.ParsePosition; +import java.util.Map; +import java.util.Set; +import org.unicode.props.IndexUnicodeProperties; +import org.unicode.props.UnicodeProperty; +import org.unicode.props.UnicodeProperty.Factory; +import org.unicode.props.UnicodeProperty.PatternMatcher; +import org.unicode.text.utility.Settings; + +public class VersionedProperty { + private String propertyName; + private String version; + private UnicodeProperty.Factory propSource; + private UnicodeProperty property; + private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher(); + + private static final Set TOOL_ONLY_PROPERTIES = + Set.of("toNFC", "toNFD", "toNFKC", "toNFKD"); + + private static boolean isTrivial(UnicodeMap map) { + return map.isEmpty() + || (map.values().size() == 1 + && map.getSet(map.values().iterator().next()) + .equals(UnicodeSet.ALL_CODE_POINTS)); + } + + public UnicodeProperty getProperty() { + return property; + } + + public VersionedProperty set(String xPropertyName) { + xPropertyName = xPropertyName.trim(); + boolean allowRetroactive = false; + if (xPropertyName.contains(":")) { + final String[] names = xPropertyName.split(":"); + if (names.length != 2) { + throw new IllegalArgumentException("Too many ':' fields in " + xPropertyName); + } + if (names[0].isEmpty()) { + throw new IllegalArgumentException("Empty version field in " + xPropertyName); + } + switch (names[0].charAt(0)) { + case 'U': + break; + case 'R': + allowRetroactive = true; + break; + default: + throw new IllegalArgumentException( + "Version field should start with U or R in " + xPropertyName); + } + if (names[0].substring(1).equals("-1")) { + version = Settings.lastVersion; + } else { + version = names[0].substring(1); + } + xPropertyName = names[1]; + } else { + version = Settings.latestVersion; + } + ; + propertyName = xPropertyName; + propSource = getIndexedProperties(version); + property = propSource.getProperty(xPropertyName); + if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName)) + || (isTrivial(property.getUnicodeMap()) && allowRetroactive)) { + propSource = ToolUnicodePropertySource.make(version); + property = propSource.getProperty(xPropertyName); + } + if (property == null || isTrivial(property.getUnicodeMap())) { + throw new IllegalArgumentException( + "Can't create property from name: " + + propertyName + + " and version: " + + version); + } + return this; + } + + public UnicodeSet getSet( + String propertyValue, SymbolTable symbolTable, Map variables) { + UnicodeSet set; + if (propertyValue.length() == 0) { + set = property.getSet("true"); + } else if (propertyValue.startsWith("/") && propertyValue.endsWith("/")) { + String body = propertyValue.substring(1, propertyValue.length() - 1); + for (final String variableMinus : variables.keySet()) { + final String variable = "$" + variableMinus; + if (body.contains(variable)) { + final String replacement = String.copyValueOf(variables.get(variableMinus)); + final UnicodeSet value = parseUnicodeSet(replacement, symbolTable); + final String valueString = value.complement(0).complement(0).toPattern(false); + body = body.replace(variable, valueString); + } + } + matcher.set(body); + set = property.getSet(matcher); + } else if (propertyValue.equals("∅")) { + set = property.getSet(NULL_MATCHER, null); + } else { + set = property.getSet(propertyValue); + } + return set; + } + + private static Factory getIndexedProperties(String version2) { + return IndexUnicodeProperties.make(version2); + } + + public static UnicodeSet parseUnicodeSet(String line, SymbolTable symbolTable) { + final ParsePosition pp = new ParsePosition(0); + final UnicodeSet result = new UnicodeSet(line, pp, symbolTable); + final int lengthUsed = pp.getIndex(); + if (lengthUsed != line.length()) { + throw new IllegalArgumentException( + "Text after end of set: " + + line.substring(0, lengthUsed) + + "XXX" + + line.substring(lengthUsed)); + } + return result; + } + + static final UnicodeProperty.PatternMatcher NULL_MATCHER = + new UnicodeProperty.PatternMatcher() { + @Override + public boolean test(String o) { + return o == null || "".equals(o); + } + + @Override + public PatternMatcher set(String pattern) { + return this; + } + }; +}