Skip to content

Commit

Permalink
Move VersionedProperty to its own file.
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Jan 13, 2024
1 parent 210204f commit 99edf9f
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 136 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -777,9 +777,13 @@ private void loadFileData() throws IOException {
final String codelist = pieces[0].trim();
final Reason reasons = Reason.fromString(pieces[1]);
if (pieces[0].startsWith("[")) {
// TODO(macchiati): Weird dependency on ChainedSymbolTable which we probably
// do not need.
sources =
TestUnicodeInvariants.parseUnicodeSet(
codelist); // .retainAll(allocated);
VersionedProperty.parseUnicodeSet(
codelist,
new TestUnicodeInvariants
.ChainedSymbolTable()); // .retainAll(allocated);
} else {
final String[] codes = Utility.split(codelist, ' ');
for (final String code : codes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,13 @@ private void loadFileData() throws IOException {
}
final String codelist = pieces[0].trim();
if (UnicodeSet.resemblesPattern(pieces[0], 0)) {
// TODO(macchiati): Weird dependency on ChainedSymbolTable which we probably
// do not need.
sources =
TestUnicodeInvariants.parseUnicodeSet(
codelist); // .retainAll(allocated);
VersionedProperty.parseUnicodeSet(
codelist,
new TestUnicodeInvariants
.ChainedSymbolTable()); // .retainAll(allocated);
if (sources.contains("ᢰ")) {
int x = 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.util.Tabber;
Expand All @@ -31,17 +30,14 @@
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.Factory;
import org.unicode.props.UnicodeProperty.PatternMatcher;
import org.unicode.text.utility.Settings;

public class TestUnicodeInvariants {
private static final boolean DEBUG = false;

// private static final Pattern IN_PATTERN = Pattern.compile("(.*)([≠=])(.*)");
private static final boolean ICU_VERSION = false; // ignore the versions if this is true
private static final String LATEST_VERSION = Settings.latestVersion; // "5.2.0"; //
private static final Factory LATEST_PROPS = getProperties(LATEST_VERSION);
private static final String LAST_VERSION = Settings.lastVersion; // "5.1.0"; //
private static final Factory LATEST_PROPS = getProperties(Settings.latestVersion);
private static final boolean SHOW_LOOKUP = false;
private static int showRangeLimit = 20;
static boolean doHtml = true;
Expand Down Expand Up @@ -580,7 +576,7 @@ static UnicodeProperty of(
if (propName.length() > 0) {
final FilterOrProp propOrFilter = new FilterOrProp();
final VersionedProperty xprop = new VersionedProperty().set(propName);
propOrFilter.prop = xprop.property;
propOrFilter.prop = xprop.getProperty();
if (propOrFilter.prop == null) {
throw new IllegalArgumentException(
"Can't create property for: " + propName);
Expand Down Expand Up @@ -1217,10 +1213,6 @@ private static Factory getProperties(final String version) {
return ICU_VERSION ? ICUPropertyFactory.make() : ToolUnicodePropertySource.make(version);
}

private static Factory getIndexedProperties(String version2) {
return IndexUnicodeProperties.make(version2);
}

static class ChainedSymbolTable extends UnicodeSet.XSymbolTable {

private static final Comparator<String> LONGEST_FIRST =
Expand Down Expand Up @@ -1289,133 +1281,15 @@ public String parseReference(String text, ParsePosition pos, int limit) {
public boolean applyPropertyAlias(
String propertyName2, String propertyValue, UnicodeSet result) {
result.clear();
result.addAll(propertyVersion.set(propertyName2).getSet(propertyValue));
result.addAll(
propertyVersion
.set(propertyName2)
.getSet(propertyValue, symbolTable, symbolTable.variables));
return true;
}
}

static class VersionedProperty {
private String propertyName;
private String version;
private UnicodeProperty.Factory propSource;
private UnicodeProperty property;
private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher();

private static final Set<String> TOOL_ONLY_PROPERTIES =
Set.of("toNFC", "toNFD", "toNFKC", "toNFKD");

private static boolean isTrivial(UnicodeMap<String> map) {
return map.isEmpty()
|| (map.values().size() == 1
&& map.getSet(map.values().iterator().next())
.equals(UnicodeSet.ALL_CODE_POINTS));
}

public VersionedProperty set(String xPropertyName) {
xPropertyName = xPropertyName.trim();
boolean allowRetroactive = false;
if (xPropertyName.contains(":")) {
final String[] names = xPropertyName.split(":");
if (names.length != 2) {
throw new IllegalArgumentException("Too many ':' fields in " + xPropertyName);
}
if (names[0].isEmpty()) {
throw new IllegalArgumentException("Empty version field in " + xPropertyName);
}
switch (names[0].charAt(0)) {
case 'U':
break;
case 'R':
allowRetroactive = true;
break;
default:
throw new IllegalArgumentException(
"Version field should start with U or R in " + xPropertyName);
}
if (names[0].substring(1).equals("-1")) {
version = LAST_VERSION;
} else {
version = names[0].substring(1);
}
xPropertyName = names[1];
} else {
version = LATEST_VERSION;
}
;
propertyName = xPropertyName;
propSource = getIndexedProperties(version);
property = propSource.getProperty(xPropertyName);
if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName))
|| (isTrivial(property.getUnicodeMap()) && allowRetroactive)) {
propSource = getProperties(version);
property = propSource.getProperty(xPropertyName);
}
if (property == null || isTrivial(property.getUnicodeMap())) {
throw new IllegalArgumentException(
"Can't create property from name: "
+ propertyName
+ " and version: "
+ version);
}
return this;
}

public UnicodeSet getSet(String propertyValue) {
UnicodeSet set;
if (propertyValue.length() == 0) {
set = property.getSet("true");
} else if (propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
String body = propertyValue.substring(1, propertyValue.length() - 1);
for (final String variableMinus : symbolTable.variables.keySet()) {
final String variable = "$" + variableMinus;
if (body.contains(variable)) {
final String replacement =
String.copyValueOf(symbolTable.variables.get(variableMinus));
final UnicodeSet value = parseUnicodeSet(replacement);
final String valueString =
value.complement(0).complement(0).toPattern(false);
body = body.replace(variable, valueString);
}
}
matcher.set(body);
set = property.getSet(matcher);
} else if (propertyValue.equals("∅")) {
set = property.getSet(NULL_MATCHER, null);
} else {
set = property.getSet(propertyValue);
}
return set;
}
}

static final UnicodeProperty.PatternMatcher NULL_MATCHER =
new UnicodeProperty.PatternMatcher() {
@Override
public boolean test(String o) {
return o == null || "".equals(o);
}

@Override
public PatternMatcher set(String pattern) {
return this;
}
};

public static UnicodeSet parseUnicodeSet(String line, ParsePosition pp) {
return new UnicodeSet(line, pp, symbolTable);
}

public static UnicodeSet parseUnicodeSet(String line) {
final ParsePosition pp = new ParsePosition(0);
final UnicodeSet result = new UnicodeSet(line, pp, symbolTable);
final int lengthUsed = pp.getIndex();
if (lengthUsed != line.length()) {
throw new IllegalArgumentException(
"Text after end of set: "
+ line.substring(0, lengthUsed)
+ "XXX"
+ line.substring(lengthUsed));
}
return result;
}
}
141 changes: 141 additions & 0 deletions unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package org.unicode.text.UCD;

import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UnicodeSet;
import java.text.ParsePosition;
import java.util.Map;
import java.util.Set;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.Factory;
import org.unicode.props.UnicodeProperty.PatternMatcher;
import org.unicode.text.utility.Settings;

public class VersionedProperty {
private String propertyName;
private String version;
private UnicodeProperty.Factory propSource;
private UnicodeProperty property;
private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher();

private static final Set<String> TOOL_ONLY_PROPERTIES =
Set.of("toNFC", "toNFD", "toNFKC", "toNFKD");

private static boolean isTrivial(UnicodeMap<String> map) {
return map.isEmpty()
|| (map.values().size() == 1
&& map.getSet(map.values().iterator().next())
.equals(UnicodeSet.ALL_CODE_POINTS));
}

public UnicodeProperty getProperty() {
return property;
}

public VersionedProperty set(String xPropertyName) {
xPropertyName = xPropertyName.trim();
boolean allowRetroactive = false;
if (xPropertyName.contains(":")) {
final String[] names = xPropertyName.split(":");
if (names.length != 2) {
throw new IllegalArgumentException("Too many ':' fields in " + xPropertyName);
}
if (names[0].isEmpty()) {
throw new IllegalArgumentException("Empty version field in " + xPropertyName);
}
switch (names[0].charAt(0)) {
case 'U':
break;
case 'R':
allowRetroactive = true;
break;
default:
throw new IllegalArgumentException(
"Version field should start with U or R in " + xPropertyName);
}
if (names[0].substring(1).equals("-1")) {
version = Settings.lastVersion;
} else {
version = names[0].substring(1);
}
xPropertyName = names[1];
} else {
version = Settings.latestVersion;
}
;
propertyName = xPropertyName;
propSource = getIndexedProperties(version);
property = propSource.getProperty(xPropertyName);
if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName))
|| (isTrivial(property.getUnicodeMap()) && allowRetroactive)) {
propSource = ToolUnicodePropertySource.make(version);
property = propSource.getProperty(xPropertyName);
}
if (property == null || isTrivial(property.getUnicodeMap())) {
throw new IllegalArgumentException(
"Can't create property from name: "
+ propertyName
+ " and version: "
+ version);
}
return this;
}

public UnicodeSet getSet(
String propertyValue, SymbolTable symbolTable, Map<String, char[]> variables) {
UnicodeSet set;
if (propertyValue.length() == 0) {
set = property.getSet("true");
} else if (propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
String body = propertyValue.substring(1, propertyValue.length() - 1);
for (final String variableMinus : variables.keySet()) {
final String variable = "$" + variableMinus;
if (body.contains(variable)) {
final String replacement = String.copyValueOf(variables.get(variableMinus));
final UnicodeSet value = parseUnicodeSet(replacement, symbolTable);
final String valueString = value.complement(0).complement(0).toPattern(false);
body = body.replace(variable, valueString);
}
}
matcher.set(body);
set = property.getSet(matcher);
} else if (propertyValue.equals("∅")) {
set = property.getSet(NULL_MATCHER, null);
} else {
set = property.getSet(propertyValue);
}
return set;
}

private static Factory getIndexedProperties(String version2) {
return IndexUnicodeProperties.make(version2);
}

public static UnicodeSet parseUnicodeSet(String line, SymbolTable symbolTable) {
final ParsePosition pp = new ParsePosition(0);
final UnicodeSet result = new UnicodeSet(line, pp, symbolTable);
final int lengthUsed = pp.getIndex();
if (lengthUsed != line.length()) {
throw new IllegalArgumentException(
"Text after end of set: "
+ line.substring(0, lengthUsed)
+ "XXX"
+ line.substring(lengthUsed));
}
return result;
}

static final UnicodeProperty.PatternMatcher NULL_MATCHER =
new UnicodeProperty.PatternMatcher() {
@Override
public boolean test(String o) {
return o == null || "".equals(o);
}

@Override
public PatternMatcher set(String pattern) {
return this;
}
};
}

0 comments on commit 99edf9f

Please sign in to comment.