Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Move VersionedProperty to its own file #647

Merged
merged 1 commit into from
Jan 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -777,9 +777,13 @@ private void loadFileData() throws IOException {
final String codelist = pieces[0].trim();
final Reason reasons = Reason.fromString(pieces[1]);
if (pieces[0].startsWith("[")) {
// TODO(macchiati): Weird dependency on ChainedSymbolTable which we probably
// do not need.
sources =
TestUnicodeInvariants.parseUnicodeSet(
codelist); // .retainAll(allocated);
VersionedProperty.parseUnicodeSet(
codelist,
new TestUnicodeInvariants
.ChainedSymbolTable()); // .retainAll(allocated);
} else {
final String[] codes = Utility.split(codelist, ' ');
for (final String code : codes) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -435,9 +435,13 @@ private void loadFileData() throws IOException {
}
final String codelist = pieces[0].trim();
if (UnicodeSet.resemblesPattern(pieces[0], 0)) {
// TODO(macchiati): Weird dependency on ChainedSymbolTable which we probably
// do not need.
sources =
TestUnicodeInvariants.parseUnicodeSet(
codelist); // .retainAll(allocated);
VersionedProperty.parseUnicodeSet(
codelist,
new TestUnicodeInvariants
.ChainedSymbolTable()); // .retainAll(allocated);
if (sources.contains("ᢰ")) {
int x = 0;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.unicode.cldr.draft.FileUtilities;
import org.unicode.cldr.util.Tabber;
Expand All @@ -31,17 +30,14 @@
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.Factory;
import org.unicode.props.UnicodeProperty.PatternMatcher;
import org.unicode.text.utility.Settings;

public class TestUnicodeInvariants {
private static final boolean DEBUG = false;

// private static final Pattern IN_PATTERN = Pattern.compile("(.*)([≠=])(.*)");
private static final boolean ICU_VERSION = false; // ignore the versions if this is true
private static final String LATEST_VERSION = Settings.latestVersion; // "5.2.0"; //
private static final Factory LATEST_PROPS = getProperties(LATEST_VERSION);
private static final String LAST_VERSION = Settings.lastVersion; // "5.1.0"; //
private static final Factory LATEST_PROPS = getProperties(Settings.latestVersion);
private static final boolean SHOW_LOOKUP = false;
private static int showRangeLimit = 20;
static boolean doHtml = true;
Expand Down Expand Up @@ -580,7 +576,7 @@ static UnicodeProperty of(
if (propName.length() > 0) {
final FilterOrProp propOrFilter = new FilterOrProp();
final VersionedProperty xprop = new VersionedProperty().set(propName);
propOrFilter.prop = xprop.property;
propOrFilter.prop = xprop.getProperty();
if (propOrFilter.prop == null) {
throw new IllegalArgumentException(
"Can't create property for: " + propName);
Expand Down Expand Up @@ -1217,10 +1213,6 @@ private static Factory getProperties(final String version) {
return ICU_VERSION ? ICUPropertyFactory.make() : ToolUnicodePropertySource.make(version);
}

private static Factory getIndexedProperties(String version2) {
return IndexUnicodeProperties.make(version2);
}

static class ChainedSymbolTable extends UnicodeSet.XSymbolTable {

private static final Comparator<String> LONGEST_FIRST =
Expand Down Expand Up @@ -1289,133 +1281,15 @@ public String parseReference(String text, ParsePosition pos, int limit) {
public boolean applyPropertyAlias(
String propertyName2, String propertyValue, UnicodeSet result) {
result.clear();
result.addAll(propertyVersion.set(propertyName2).getSet(propertyValue));
result.addAll(
propertyVersion
.set(propertyName2)
.getSet(propertyValue, symbolTable, symbolTable.variables));
return true;
}
}

static class VersionedProperty {
private String propertyName;
private String version;
private UnicodeProperty.Factory propSource;
private UnicodeProperty property;
private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher();

private static final Set<String> TOOL_ONLY_PROPERTIES =
Set.of("toNFC", "toNFD", "toNFKC", "toNFKD");

private static boolean isTrivial(UnicodeMap<String> map) {
return map.isEmpty()
|| (map.values().size() == 1
&& map.getSet(map.values().iterator().next())
.equals(UnicodeSet.ALL_CODE_POINTS));
}

public VersionedProperty set(String xPropertyName) {
xPropertyName = xPropertyName.trim();
boolean allowRetroactive = false;
if (xPropertyName.contains(":")) {
final String[] names = xPropertyName.split(":");
if (names.length != 2) {
throw new IllegalArgumentException("Too many ':' fields in " + xPropertyName);
}
if (names[0].isEmpty()) {
throw new IllegalArgumentException("Empty version field in " + xPropertyName);
}
switch (names[0].charAt(0)) {
case 'U':
break;
case 'R':
allowRetroactive = true;
break;
default:
throw new IllegalArgumentException(
"Version field should start with U or R in " + xPropertyName);
}
if (names[0].substring(1).equals("-1")) {
version = LAST_VERSION;
} else {
version = names[0].substring(1);
}
xPropertyName = names[1];
} else {
version = LATEST_VERSION;
}
;
propertyName = xPropertyName;
propSource = getIndexedProperties(version);
property = propSource.getProperty(xPropertyName);
if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName))
|| (isTrivial(property.getUnicodeMap()) && allowRetroactive)) {
propSource = getProperties(version);
property = propSource.getProperty(xPropertyName);
}
if (property == null || isTrivial(property.getUnicodeMap())) {
throw new IllegalArgumentException(
"Can't create property from name: "
+ propertyName
+ " and version: "
+ version);
}
return this;
}

public UnicodeSet getSet(String propertyValue) {
UnicodeSet set;
if (propertyValue.length() == 0) {
set = property.getSet("true");
} else if (propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
String body = propertyValue.substring(1, propertyValue.length() - 1);
for (final String variableMinus : symbolTable.variables.keySet()) {
final String variable = "$" + variableMinus;
if (body.contains(variable)) {
final String replacement =
String.copyValueOf(symbolTable.variables.get(variableMinus));
final UnicodeSet value = parseUnicodeSet(replacement);
final String valueString =
value.complement(0).complement(0).toPattern(false);
body = body.replace(variable, valueString);
}
}
matcher.set(body);
set = property.getSet(matcher);
} else if (propertyValue.equals("∅")) {
set = property.getSet(NULL_MATCHER, null);
} else {
set = property.getSet(propertyValue);
}
return set;
}
}

static final UnicodeProperty.PatternMatcher NULL_MATCHER =
new UnicodeProperty.PatternMatcher() {
@Override
public boolean test(String o) {
return o == null || "".equals(o);
}

@Override
public PatternMatcher set(String pattern) {
return this;
}
};

public static UnicodeSet parseUnicodeSet(String line, ParsePosition pp) {
return new UnicodeSet(line, pp, symbolTable);
}

public static UnicodeSet parseUnicodeSet(String line) {
final ParsePosition pp = new ParsePosition(0);
final UnicodeSet result = new UnicodeSet(line, pp, symbolTable);
final int lengthUsed = pp.getIndex();
if (lengthUsed != line.length()) {
throw new IllegalArgumentException(
"Text after end of set: "
+ line.substring(0, lengthUsed)
+ "XXX"
+ line.substring(lengthUsed));
}
return result;
}
}
141 changes: 141 additions & 0 deletions unicodetools/src/main/java/org/unicode/text/UCD/VersionedProperty.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,141 @@
package org.unicode.text.UCD;

import com.ibm.icu.dev.util.UnicodeMap;
import com.ibm.icu.text.SymbolTable;
import com.ibm.icu.text.UnicodeSet;
import java.text.ParsePosition;
import java.util.Map;
import java.util.Set;
import org.unicode.props.IndexUnicodeProperties;
import org.unicode.props.UnicodeProperty;
import org.unicode.props.UnicodeProperty.Factory;
import org.unicode.props.UnicodeProperty.PatternMatcher;
import org.unicode.text.utility.Settings;

public class VersionedProperty {
private String propertyName;
private String version;
private UnicodeProperty.Factory propSource;
private UnicodeProperty property;
private final transient PatternMatcher matcher = new UnicodeProperty.RegexMatcher();

private static final Set<String> TOOL_ONLY_PROPERTIES =
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

BTW, just checked in changes to a PR so that the normalization code can use the IndexUnicodeProperties.

Set.of("toNFC", "toNFD", "toNFKC", "toNFKD");

private static boolean isTrivial(UnicodeMap<String> map) {
return map.isEmpty()
|| (map.values().size() == 1
&& map.getSet(map.values().iterator().next())
.equals(UnicodeSet.ALL_CODE_POINTS));
}

public UnicodeProperty getProperty() {
return property;
}

public VersionedProperty set(String xPropertyName) {
xPropertyName = xPropertyName.trim();
boolean allowRetroactive = false;
if (xPropertyName.contains(":")) {
final String[] names = xPropertyName.split(":");
if (names.length != 2) {
throw new IllegalArgumentException("Too many ':' fields in " + xPropertyName);
}
if (names[0].isEmpty()) {
throw new IllegalArgumentException("Empty version field in " + xPropertyName);
}
switch (names[0].charAt(0)) {
case 'U':
break;
case 'R':
allowRetroactive = true;
break;
default:
throw new IllegalArgumentException(
"Version field should start with U or R in " + xPropertyName);
}
if (names[0].substring(1).equals("-1")) {
version = Settings.lastVersion;
} else {
version = names[0].substring(1);
}
xPropertyName = names[1];
} else {
version = Settings.latestVersion;
}
;
propertyName = xPropertyName;
propSource = getIndexedProperties(version);
property = propSource.getProperty(xPropertyName);
if ((property == null && TOOL_ONLY_PROPERTIES.contains(xPropertyName))
|| (isTrivial(property.getUnicodeMap()) && allowRetroactive)) {
propSource = ToolUnicodePropertySource.make(version);
property = propSource.getProperty(xPropertyName);
}
if (property == null || isTrivial(property.getUnicodeMap())) {
throw new IllegalArgumentException(
"Can't create property from name: "
+ propertyName
+ " and version: "
+ version);
}
return this;
}

public UnicodeSet getSet(
String propertyValue, SymbolTable symbolTable, Map<String, char[]> variables) {
UnicodeSet set;
if (propertyValue.length() == 0) {
set = property.getSet("true");
} else if (propertyValue.startsWith("/") && propertyValue.endsWith("/")) {
String body = propertyValue.substring(1, propertyValue.length() - 1);
for (final String variableMinus : variables.keySet()) {
final String variable = "$" + variableMinus;
if (body.contains(variable)) {
final String replacement = String.copyValueOf(variables.get(variableMinus));
final UnicodeSet value = parseUnicodeSet(replacement, symbolTable);
final String valueString = value.complement(0).complement(0).toPattern(false);
body = body.replace(variable, valueString);
}
}
matcher.set(body);
set = property.getSet(matcher);
} else if (propertyValue.equals("∅")) {
set = property.getSet(NULL_MATCHER, null);
} else {
set = property.getSet(propertyValue);
}
return set;
}

private static Factory getIndexedProperties(String version2) {
return IndexUnicodeProperties.make(version2);
}

public static UnicodeSet parseUnicodeSet(String line, SymbolTable symbolTable) {
final ParsePosition pp = new ParsePosition(0);
final UnicodeSet result = new UnicodeSet(line, pp, symbolTable);
final int lengthUsed = pp.getIndex();
if (lengthUsed != line.length()) {
throw new IllegalArgumentException(
"Text after end of set: "
+ line.substring(0, lengthUsed)
+ "XXX"
+ line.substring(lengthUsed));
}
return result;
}

static final UnicodeProperty.PatternMatcher NULL_MATCHER =
new UnicodeProperty.PatternMatcher() {
@Override
public boolean test(String o) {
return o == null || "".equals(o);
}

@Override
public PatternMatcher set(String pattern) {
return this;
}
};
}
Loading