Skip to content

Commit

Permalink
Fix the handling of multivalued Unihan properties (#652)
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin authored Jan 18, 2024
1 parent b07cf2e commit 77e3ce6
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 15 deletions.
23 changes: 15 additions & 8 deletions unicodetools/src/main/java/org/unicode/props/UnicodeProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -286,14 +286,21 @@ public List<String> getValueAliases(String valueAlias, List<String> result) {
if (result == null) result = new ArrayList<>(1);
result = _getValueAliases(valueAlias, result);
if (!result.contains(valueAlias)) { // FIX && type < NUMERIC
result = _getValueAliases(valueAlias, result); // for debugging
throw new IllegalArgumentException(
"Internal error: "
+ getName()
+ " doesn't contain "
+ valueAlias
+ ": "
+ new BagFormatter().join(result));
if (type == MISC) {
// Unihan has multivalued properties but does not use aliases.
result.add(valueAlias);
} else {
result = _getValueAliases(valueAlias, result); // for debugging
throw new IllegalArgumentException(
"Internal error: "
+ getName()
+ " ("
+ getTypeName()
+ ") doesn't contain "
+ valueAlias
+ ": "
+ new BagFormatter().join(result));
}
}
return result;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -892,14 +892,13 @@ Let $ideohack = [〆 〇 〡-〩]
# 10.0 added Nushu
# 13.0 added Khitan_Small_Script

# Following don't work yet. Tested independently in TestInvariants
# \p{Unified_Ideograph} ⊇ \P{kRSUnicode=∅}
\p{Unified_Ideograph} ⊂ \p{kRSUnicode=/./}
\p{kRSUnicode=/./} = [\p{Block=/^CJK.(Unified|Compatibility).Ideographs/} - \p{gc=Cn}]
\p{kRSUnicode=/./} = \p{kTotalStrokes=/./}

# \p{Unified_Ideograph} = \P{kRSUnicode=∅}

# \P{kRSUnicode=∅} = \P{kTotalStrokes=∅}

# \P{kHanyuPinyin=∅} ⊇ \P{kMandarin=∅}
# TODO(eggrobin): Should those two have a kMandarin, or this not actually an invariant?
# TODO(macchiati): The kHanyuPinyin UnicodeSet is excruciatingly slow.
# \p{kHanyuPinyin=/./} - \p{kMandarin=/./} = [\{228F5}\x{2574C}]

# InPC-InSC-gc invariants
# See https://www.unicode.org/L2/L2023/23200-category-invariants.pdf.
Expand Down

0 comments on commit 77e3ce6

Please sign in to comment.