Skip to content

Commit

Permalink
Embrace the | since we need it for Unihan.
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Jan 14, 2024
1 parent 535de19 commit 8bce993
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public void TestScxMulti() {
}
assertEquals(
"Expected exception",
"Multivalued property values can't contain commas.",
"Multivalued property values can't contain the delimiter.",
exceptionMessage);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
* @author markdavis
*/
public class IndexUnicodeProperties extends UnicodeProperty.Factory {
static final String SET_SEPARATOR = ",";
static final String SET_SEPARATOR = "|";
/** Control file caching */
static final boolean GZIP = true;

Expand Down Expand Up @@ -681,6 +681,7 @@ class IndexUnicodeProperty extends UnicodeProperty.BaseProperty {
if (PropertyParsingInfo.property2PropertyInfo.get(item).getMultivalued()
!= ValueCardinality.Singleton) {
setMultivalued(true);
setDelimiter(SET_SEPARATOR);
}
}

Expand Down
22 changes: 15 additions & 7 deletions unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@

public abstract class UnicodeProperty extends UnicodeLabel {

private static final Splitter SPLIT_COMMAS = Splitter.on(",");
public static final UnicodeSet NONCHARACTERS =
new UnicodeSet("[:noncharactercodepoint:]").freeze();
public static final UnicodeSet PRIVATE_USE = new UnicodeSet("[:gc=privateuse:]").freeze();
Expand Down Expand Up @@ -157,11 +156,20 @@ public static synchronized void ResetCacheProperties() {

private boolean isMultivalued = false;

private String delimiter = ",";
private Splitter delimiterSplitter = Splitter.on(delimiter);

public UnicodeProperty setMultivalued(boolean value) {
isMultivalued = value;
return this;
}

public UnicodeProperty setDelimiter(String value) {
delimiter = value;
delimiterSplitter = Splitter.on(delimiter);
return this;
}

/*
* Name: Unicode_1_Name Name: ISO_Comment Name: Name Name: Unicode_1_Name
*
Expand Down Expand Up @@ -336,14 +344,14 @@ public final String getFirstValueAlias(String value) {
if (valueToFirstValueAlias == null) _getFirstValueAliasCache();
if (isMultivalued) {
List<String> result = new ArrayList<>();
for (String part : value.split(",")) {
for (String part : value.split(delimiter)) {
String partAlias = valueToFirstValueAlias.get(part);
if (partAlias == null) {
throw new IllegalArgumentException(value + " is not a value alias for " + name);
}
result.add(partAlias);
}
return String.join(",", result);
return String.join(delimiter, result);
}
String result = valueToFirstValueAlias.get(value);
if (result == null) {
Expand Down Expand Up @@ -407,8 +415,8 @@ public final UnicodeSet getSet(PatternMatcher matcher) {
* the original contents.
*/
public final UnicodeSet getSet(String propertyValue, UnicodeSet result) {
if (isMultivalued && propertyValue.contains(",")) {
throw new IllegalArgumentException("Multivalued property values can't contain commas.");
if (isMultivalued && propertyValue.contains(delimiter)) {
throw new IllegalArgumentException("Multivalued property values can't contain the delimiter.");
} else {
return getSet(
new SimpleMatcher(
Expand Down Expand Up @@ -446,8 +454,8 @@ public UnicodeSet getSet(PatternMatcher matcher, UnicodeSet result) {
valueAliases.clear();
getValueAliases(value, valueAliases);
for (String valueAlias : valueAliases) {
if (isMultivalued && valueAlias.contains(",")) {
for (String part : SPLIT_COMMAS.split(valueAlias)) {
if (isMultivalued && valueAlias.contains(delimiter)) {
for (String part : delimiterSplitter.split(valueAlias)) {
partAliases.clear();
getValueAliases(part, partAliases);
for (String partAlias : partAliases) {
Expand Down

0 comments on commit 8bce993

Please sign in to comment.