Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix the IndexUnicodeProperties Joining_Type #657

Merged
merged 5 commits into from
Jan 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions unicodetools/src/main/java/org/unicode/props/UcdProperty.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.unicode.props.UcdPropertyValues.NFKC_Quick_Check_Values;
import org.unicode.props.UcdPropertyValues.NFKD_Quick_Check_Values;
import org.unicode.props.UcdPropertyValues.Numeric_Type_Values;
import org.unicode.props.UcdPropertyValues.Other_Joining_Type_Values;
import org.unicode.props.UcdPropertyValues.Script_Values;
import org.unicode.props.UcdPropertyValues.Sentence_Break_Values;
import org.unicode.props.UcdPropertyValues.Vertical_Orientation_Values;
Expand Down Expand Up @@ -241,6 +242,8 @@ public enum UcdProperty {
NFKC_Quick_Check(PropertyType.Enumerated, NFKC_Quick_Check_Values.class, null, "NFKC_QC"),
NFKD_Quick_Check(PropertyType.Enumerated, NFKD_Quick_Check_Values.class, null, "NFKD_QC"),
Numeric_Type(PropertyType.Enumerated, Numeric_Type_Values.class, null, "nt"),
Other_Joining_Type(
PropertyType.Enumerated, Other_Joining_Type_Values.class, null, "Other_Joining_Type"),
Sentence_Break(PropertyType.Enumerated, Sentence_Break_Values.class, null, "SB"),
Vertical_Orientation(PropertyType.Enumerated, Vertical_Orientation_Values.class, null, "vo"),
Word_Break(PropertyType.Enumerated, Word_Break_Values.class, null, "WB"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1708,6 +1708,40 @@ public static Numeric_Type_Values forName(String name) {
}

// Numeric_Value
public enum Other_Joining_Type_Values implements Named {
Join_Causing("C"),
Dual_Joining("D"),
Left_Joining("L"),
Right_Joining("R"),
Transparent("T"),
Non_Joining("U"),
Deduce_From_General_Category("Deduce_From_General_Category");
private final PropertyNames<Other_Joining_Type_Values> names;

private Other_Joining_Type_Values(String shortName, String... otherNames) {
names =
new PropertyNames<Other_Joining_Type_Values>(
Other_Joining_Type_Values.class, this, shortName, otherNames);
}

@Override
public PropertyNames<Other_Joining_Type_Values> getNames() {
return names;
}

@Override
public String getShortName() {
return names.getShortName();
}

private static final NameMatcher<Other_Joining_Type_Values> NAME_MATCHER =
PropertyNames.getNameToEnums(Other_Joining_Type_Values.class);

public static Other_Joining_Type_Values forName(String name) {
return NAME_MATCHER.get(name);
}
}

public enum Script_Values implements Named {
Adlam("Adlm"),
Caucasian_Albanian("Aghb"),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@ idtype ; Identifier_Type
idns ; Idn_Status
idn8 ; Idn_2008

# Unofficial contributory property used in the derivation of Joining_Type.
Other_Joining_Type ; Other_Joining_Type

# ================================================
# String Properties
# ================================================
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@
# @missing: 0000..10FFFF; kTraditionalVariant ; <none>

# @missing: 0000..10FFFF; Joining_Group ; No_Joining_Group
# @missing: 0000..10FFFF; Joining_Type ; Non_Joining

# Overrides for bugs

Expand Down Expand Up @@ -124,7 +123,6 @@ idn8 ; na ; na

# @missing: 0000..10FFFF; Idn_Mapping ; <code point>


# @missing: 0000..10FFFF; Identifier_Status ; r

idstatus ; r ; Restricted
Expand Down Expand Up @@ -162,3 +160,12 @@ sc ; Zxxx ; Unwritten

# TODO: there is no Unicode 13.1, see https://github.com/unicode-org/unicodetools/issues/100
age; 13.1 ; V13_1

# @missing: 0000..10FFFF; Other_Joining_Type ; Deduce_From_General_Category
Other_Joining_Type ; C ; Join_Causing
Other_Joining_Type ; D ; Dual_Joining
Other_Joining_Type ; L ; Left_Joining
Other_Joining_Type ; R ; Right_Joining
Other_Joining_Type ; T ; Transparent
Other_Joining_Type ; U ; Non_Joining
Other_Joining_Type ; Deduce_From_General_Category ; Deduce_From_General_Category
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ UnicodeData; Simple_Lowercase_Mapping ; 13
UnicodeData; Simple_Titlecase_Mapping ; 14
UnicodeData; Unicode_1_Name ; 10
UnicodeData; ISO_Comment ; 11
ArabicShaping; Joining_Type; 2
# Handle the complex default of ArabicShaping.txt by introducing an unofficial
# contributory property, to be used when deriving Joining_Type.
ArabicShaping; Other_Joining_Type; 2
DerivedJoiningType; Joining_Type; 1
ArabicShaping; Joining_Group; 3
BidiMirroring; Bidi_Mirroring_Glyph;
Blocks ; Block
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,17 @@ Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MA
Let $nonDiacriticNuktas = [\u1BE6\U00010A38\U00010A39\U00010A3A\U0001133B]
[\p{InSc=Nukta} - \p{Diacritic}] = $nonDiacriticNuktas

## Joining_Type and Joining_Group
# Where defined, the Joining_Group refines the Joining_Type.
EquivalencesOf \P{Joining_Group=No_Joining_Group} Joining_Group ⇒ Joining_Type
\p{gc=Mn} ⊆ \p{Joining_Type=Transparent}
\p{gc=Me} ⊆ \p{Joining_Type=Transparent}

# Derivation of Joining_Type from the second column of ArabicShaping.txt (unofficially Other_Joining_Type).
In \P{Other_Joining_Type=Deduce_From_General_Category} Joining_Type = Other_Joining_Type
[ \p{Other_Joining_Type=Deduce_From_General_Category} & [\p{gc=Mn}\p{gc=Me}\p{gc=Cf}] ] ⊆ \p{Joining_Type=Transparent}
[ \p{Other_Joining_Type=Deduce_From_General_Category} - [\p{gc=Mn}\p{gc=Me}\p{gc=Cf}] ] ⊆ \p{Joining_Type=Non_Joining}

##########################
# LineBreak property
##########################
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -214,35 +214,6 @@ public void TestAAScripts() {
}
}

@Test
public void TestJoiningGroupConsistency() {
// TODO(egg): I would like to be able to put that in the invariants tests as « the partition
// defined by Joining_Group is finer than that defined by Joining_Type ».
UnicodeMap<String> joiningGroup = iup.load(UcdProperty.Joining_Group);
UnicodeMap<String> joiningType = iup.load(UcdProperty.Joining_Type);
var charactersByJoiningGroup = new HashMap<String, UnicodeSet>();
joiningGroup.addInverseTo(charactersByJoiningGroup).remove("No_Joining_Group");
charactersByJoiningGroup.forEach(
(group, set) -> {
final int first = set.getRangeStart(0);
final String firstType = joiningType.get(first);
set.forEach(
(c) -> {
assertEquals(
"U+"
+ getCodeAndName(Character.toString(first))
+ "\nand\nU+"
+ getCodeAndName(c)
+ "\nhave different joining types but are in the"
+ " same joining group ("
+ group
+ ")\n",
firstType,
joiningType.get(c));
});
});
}

@Test
public void TestScripts() {

Expand Down
Loading