diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 69e23177d..485a35d5d 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -586,12 +586,22 @@ Let $nonAlphabeticAvagrahas = [\N{TIBETAN MARK PALUTA}] # A punctuation mark. [\p{InSC=Avagraha} - $nonAlphabeticAvagrahas] ⊆ \p{Alphabetic} # Name-based checks. +Let $nonLowercaseSmallLetters = [ \p{name=/^LIMBU SMALL LETTER/} \N{TURNED GREEK SMALL LETTER IOTA} \p{name=/^(SQUARED|PARENTHESIZED|TAG) LATIN SMALL LETTER/} ] +Let $nonLowercaseSmallModifierLetters = [ \p{gc=Lm} & \p{name=/^ARABIC SMALL/} ] +[ \p{name=/\bSMALL LETTER\b/}-\p{gc=Mn}-\p{gc=Lt} - $nonLowercaseSmallLetters ] ⊆ \p{Lowercase} +[ [\p{gc=Lm} & \p{name=/SMALL/}] - $nonLowercaseSmallModifierLetters ] ⊆ \p{Lowercase} # Combining letters are often alphabetic (medievalist abbreviations). # The others are diacritic (cantillation marks, phonetics). # See 177-C52. \p{name=/COMBINING .* LETTER/} ⊆ [\p{Alphabetic}\p{Diacritic}] +## Consistency of Lowercase with decompositions. +# Note that the same is not true of Uppercase. +# A non-lowercase character has non-lowercase characters in its decomposition, +# or its decomposition is (㋍ etc.). +In [\P{Lowercase} - \p{dt=square}], \p{Lowercase} * toNFKD ≠ toNFKD + ## Joining_Type and Joining_Group # Where defined, the Joining_Group refines the Joining_Type. OnPairsOf \P{Joining_Group=No_Joining_Group}, EqualityOf Joining_Group ⇒ EqualityOf Joining_Type