From cea2f4dc69b145c03f67dc72693033afd61dd93e Mon Sep 17 00:00:00 2001 From: Roozbeh Pournader Date: Mon, 13 Nov 2023 15:05:13 -0800 Subject: [PATCH] Add new InSC property value Reordering_Killer and move Batak viramas to it (#607) [177-C42] Consensus: Add a new property value Reordering_Killer to IndicSyllabicCategory.txt for Unicode version 16.0. (Ref. L2/23-205 and Section 18 of L2/23-238) [177-C43] Consensus: Change the InSC property values for the Batak viramas U+1BF2 BATAK PANGOLAT and U+1BF3 BATAK PANONGONAN from Pure_Killer to Reordering_Killer for Unicode version 16.0. (Ref. L2/23-205 and Section 18 of L2/23-238) [177-A96] Action Item for Roozbeh Pournader, SAH: Add a new property value Reordering_Killer to the Indic Syllabic Category (InSC) property; and change the InSC property values for U+1BF2 BATAK PANGOLAT and U+1BF3 BATAK PANONGONAN from Pure_Killer to Reordering_Killer. [177-A97] Action Item for Roozbeh Pournader, SAH: Consider the suggestions provided at the end of L2/23-205 for potential updates to the descriptions of the existing InSC property values (Ref. L2/23-205 and Section 18 of L2/23-238) [177-A98] Action Item for Markus Scherer, PAG: Add Reordering_Killer to the property value aliases for InSC in PropertyValueAliases.txt, for Unicode version 16.0. --- .../data/ucd/dev/IndicSyllabicCategory.txt | 32 +++++++++++++------ .../data/ucd/dev/PropertyValueAliases.txt | 1 + .../org/unicode/props/UcdPropertyValues.java | 1 + .../org/unicode/text/UCD/MakeUnicodeFiles.txt | 23 +++++++++---- .../unicode/text/UCD/UnicodeInvariantTest.txt | 5 +-- 5 files changed, 44 insertions(+), 18 deletions(-) diff --git a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt index 25450c0a6..f678e630f 100644 --- a/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt +++ b/unicodetools/data/ucd/dev/IndicSyllabicCategory.txt @@ -1,5 +1,5 @@ # IndicSyllabicCategory-16.0.0.txt -# Date: 2023-11-10, 22:06:18 GMT +# Date: 2023-11-13, 19:36:00 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -262,14 +262,15 @@ A9B3 ; Nukta # Mn JAVANESE SIGN CECAK TELU # Indic_Syllabic_Category=Virama -# Virama (killing of inherent vowel in consonant sequence -# or consonant stacker) +# Virama (kills inherent vowel of consonant; may act as a Pure_Killer +# or Invisible_Stacker depending on context) # Only includes characters that can act both as visible killer viramas # and consonant stackers. Separate property values exist for characters -# that can only act as pure killers or only as consonant stackers. +# that can only act as pure killers, only as reordering killers, or only +# as consonant stackers. # [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker) -# - (InSC=Number_Joiner) - 2D7F] +# - (InSC=Reordering_Killer) - (InSC=Number_Joiner) - 2D7F] 094D ; Virama # Mn DEVANAGARI SIGN VIRAMA 09CD ; Virama # Mn BENGALI SIGN VIRAMA @@ -303,8 +304,9 @@ A9C0 ; Virama # Mc JAVANESE PANGKON # Indic_Syllabic_Category=Pure_Killer -# Pure killer (killing of inherent vowel in consonant sequence, -# with no consonant stacking behavior) +# Pure killer (kills inherent vowel of consonant; always visible; +# has no conjuct formation, consonant stacking, or reordering +# behavior) # [Not derivable] @@ -320,7 +322,6 @@ A9C0 ; Virama # Mc JAVANESE PANGKON 17D1 ; Pure_Killer # Mn KHMER SIGN VIRIAM 1A7A ; Pure_Killer # Mn TAI THAM SIGN RA HAAM 1BAA ; Pure_Killer # Mc SUNDANESE SIGN PAMAAEH -1BF2..1BF3 ; Pure_Killer # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN A82C ; Pure_Killer # Mn SYLOTI NAGRI SIGN ALTERNATE HASANTA A953 ; Pure_Killer # Mc REJANG VIRAMA ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK @@ -339,9 +340,22 @@ ABED ; Pure_Killer # Mn MEETEI MAYEK APUN IYEK # ================================================ +# Indic_Syllabic_Category=Reordering_Killer + +# Reordering killer (kills inherent vowel of consonant; always visible; +# may cause consonant reordering) + +# [Not derivable] + +1BF2..1BF3 ; Reordering_Killer # Mc [2] BATAK PANGOLAT..BATAK PANONGONAN + +# ================================================ + # Indic_Syllabic_Category=Invisible_Stacker -# Invisible stacker (invisible consonant stacker virama). +# Invisible stacker (usually kills inherent vowel of consonant; is not visible +# by itself; causes conjunct formation or consonant +# stacking) # # Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible # stacker may have a second function, changing the shape and/or location of the diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 3d9b1a0d1..397599719 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -919,6 +919,7 @@ InSC; Number_Joiner ; Number_Joiner InSC; Other ; Other InSC; Pure_Killer ; Pure_Killer InSC; Register_Shifter ; Register_Shifter +InSC; Reordering_Killer ; Reordering_Killer InSC; Syllable_Modifier ; Syllable_Modifier InSC; Tone_Letter ; Tone_Letter InSC; Tone_Mark ; Tone_Mark diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 321cb20b1..64f818680 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -1096,6 +1096,7 @@ public enum Indic_Syllabic_Category_Values implements Named { Other("Other"), Pure_Killer("Pure_Killer"), Register_Shifter("Register_Shifter"), + Reordering_Killer("Reordering_Killer"), Syllable_Modifier("Syllable_Modifier"), Tone_Letter("Tone_Letter"), Tone_Mark("Tone_Mark"), diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt index 710d63d79..34fc8f228 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/MakeUnicodeFiles.txt @@ -1119,21 +1119,30 @@ Value: Nukta # [Not derivable] Value: Virama -# Virama (killing of inherent vowel in consonant sequence -# or consonant stacker) +# Virama (kills inherent vowel of consonant; may act as a Pure_Killer +# or Invisible_Stacker depending on context) # Only includes characters that can act both as visible killer viramas # and consonant stackers. Separate property values exist for characters -# that can only act as pure killers or only as consonant stackers. +# that can only act as pure killers, only as reordering killers, or only +# as consonant stackers. # [Derivation: (ccc=9) - (InSC=Pure_Killer) - (InSC=Invisible_Stacker) -# - (InSC=Number_Joiner) - 2D7F] +# - (InSC=Reordering_Killer) - (InSC=Number_Joiner) - 2D7F] Value: Pure_Killer -# Pure killer (killing of inherent vowel in consonant sequence, -# with no consonant stacking behavior) +# Pure killer (kills inherent vowel of consonant; always visible; +# has no conjuct formation, consonant stacking, or reordering +# behavior) + +# [Not derivable] +Value: Reordering_Killer +# Reordering killer (kills inherent vowel of consonant; always visible; +# may cause consonant reordering) # [Not derivable] Value: Invisible_Stacker -# Invisible stacker (invisible consonant stacker virama). +# Invisible stacker (usually kills inherent vowel of consonant; is not visible +# by itself; causes conjunct formation or consonant +# stacking) # # Note that in some scripts, such as Kharoshthi and Masaram Gondi, an invisible # stacker may have a second function, changing the shape and/or location of the diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index e02375280..9fa0ca3a0 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -571,8 +571,9 @@ Let $OPInclusions = [\u00A1\u00BF\u2E18\U00013258-\U0001325A\U00013286\U00013288 [\p{LB=CL}\p{LB=CP}] ⊃ [\p{GC=Pe} - [\u2309\u230B]] # See L2/22-086 for an explanation of the special case of Batak. +# (Note that Consensus 177-C43 changed the Batak viramas from InSC=Pure_Killer to Reordering_Killer.) \p{LB=VI} = [[\p{Indic_Syllabic_Category=Virama}\p{Indic_Syllabic_Category=Invisible_Stacker}] & $BrahmicLineBreaking] -\p{LB=VF} = [\p{Indic_Syllabic_Category=Pure_Killer} & $VFScripts] +\p{LB=VF} = [\p{Indic_Syllabic_Category=Reordering_Killer} & $VFScripts] # 15.1: Action item UTC-176-A81: change [[:PCM:]-\u070F] lb=AL->NU \p{LB=CM} = [[\u3035] \p{GC=Mn} \p{GC=Me} \p{GC=Mc} \p{GC=Cc} \p{GC=Cf} -[\U00013437\U00013438\U0001343C-\U0001343F] -\p{LB=SA} -\p{LB=WJ} -\p{LB=ZW} -\p{LB=BA} -\p{LB=LF} -\p{LB=BK} -\p{LB=CR} -\p{LB=NL} -\p{LB=GL} -\p{LB=AL} -\p{LB=ZWJ} - \p{LB=VI} - \p{LB=VF} - \p{LB=NU}] @@ -906,4 +907,4 @@ Let $ideohack = [〆 〇 〡-〩] [\P{InPC=NA}&\p{gc=Mc}] ⊆ \p{InPC=/(Left|Right)/} [\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{gc=Mn}\p{gc=Lo}] \p{gc=Mn} ⊆ \P{InPC=/(Left|Right)/} -[\p{gc=Lo}&\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{InSC=Consonant_Preceding_Repha}\p{InSC=Consonant_Prefixed}] \ No newline at end of file +[\p{gc=Lo}&\P{InPC=NA}&\P{InPC=/(Left|Right)/}] ⊆ [\p{InSC=Consonant_Preceding_Repha}\p{InSC=Consonant_Prefixed}]