diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 30e0c131b..00bd5d8a6 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2023-11-03, 05:19:42 GMT +# Date: 2023-11-03, 21:22:10 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -3552,20 +3552,12 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1F7F0 ; ID # So HEAVY EQUALS SIGN 1F7F1..1F7FF ; ID # Cn [15] .. 1F800..1F80B ; AL # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD -1F80C..1F80F ; ID # Cn [4] .. 1F810..1F847 ; AL # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW -1F848..1F84F ; ID # Cn [8] .. 1F850..1F859 ; AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW -1F85A..1F85F ; ID # Cn [6] .. 1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW -1F888..1F88F ; ID # Cn [8] .. 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8AE..1F8AF ; ID # Cn [2] .. -1F8B0..1F8B1 ; ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST -1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK -1F8B3..1F8BF ; ID # Cn [13] .. +1F8B0..1F8B2 ; AL # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW -1F8C2..1F8FF ; ID # Cn [62] .. 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1F90C ; EB # So PINCHED FINGERS 1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 392801499..226d0f54e 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2023-11-03, 21:06:12 GMT +# Date: 2023-11-03, 21:22:07 GMT # © 2023 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use, see https://www.unicode.org/terms_of_use.html @@ -39,11 +39,12 @@ # 1F680..1F6FF Transport_And_Map_Symbols # 1F700..1F77F Alchemical_Symbols # 1F780..1F7FF Geometric_Shapes_Extended -# 1F800..1F8FF Supplemental_Arrows_C +# @missing: 1F000..1F7FF; Ideographic + # 1F900..1F9FF Supplemental_Symbols_And_Pictographs # 1FA00..1FA6F Chess_Symbols # 1FA70..1FAFF Symbols_And_Pictographs_Extended_A -# @missing: 1F000..1FAFF; Ideographic +# @missing: 1F900..1FAFF; Ideographic # @missing: 1FC00..1FFFD; Ideographic @@ -68,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 762416 code points not listed here. -# Total code points: 899884 +# The above property value applies to 762519 code points not listed here. +# Total code points: 899987 # ================================================ @@ -1600,14 +1601,14 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1F850..1F859 ; AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW 1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW 1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS -1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK +1F8B0..1F8B2 ; AL # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK 1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW 1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT 1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON -# Total code points: 21967 +# Total code points: 21969 # ================================================ @@ -1816,7 +1817,6 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 1F7D5..1F7D9 ; ID # So [5] CIRCLED TRIANGLE..NINE POINTED WHITE STAR 1F7E0..1F7EB ; ID # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE 1F7F0 ; ID # So HEAVY EQUALS SIGN -1F8B0..1F8B1 ; ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST 1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART 1F910..1F917 ; ID # So [8] ZIPPER-MOUTH FACE..HUGGING FACE 1F920..1F925 ; ID # So [6] FACE WITH COWBOY HAT..LYING FACE @@ -1846,8 +1846,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR 30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# The above property value applies to 61968 code points not listed here. -# Total code points: 172527 +# The above property value applies to 61865 code points not listed here. +# Total code points: 172422 # ================================================ diff --git a/unicodetools/src/main/java/org/unicode/props/DefaultValues.java b/unicodetools/src/main/java/org/unicode/props/DefaultValues.java index 56038cca1..6a8ec9356 100644 --- a/unicodetools/src/main/java/org/unicode/props/DefaultValues.java +++ b/unicodetools/src/main/java/org/unicode/props/DefaultValues.java @@ -250,6 +250,8 @@ UnicodeMap build() { addRangeValueIfAtLeast(lb, 0x1F000, 0x1FFFD, 0x90000, ID); // Unicode 13+: punch a hole addRangeValueIfAtLeast(lb, 0x1FB00, 0x1FBFF, 0xD0000, XX); + // Unicode 16+: Supplemental Arrows-C to XX. + addRangeValueIfAtLeast(lb, 0x1F800, 0x1F8FF, 0x100000, XX); return lb; } diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 4a336bc89..4f3e7316b 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -544,9 +544,10 @@ Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MA # LineBreak property ########################## -Let $IDInclusions = [[:block=/Ideographs/:] [[\U00020000-\U0003FFFF][\U0001F000-\U0001FAFF][\U0001FC00-\U0001FFFF]] & [:gc=Cn:] - [:NChar:]] +Let $IDInclusions = [[:block=/Ideographs/:] [[\U00020000-\U0003FFFF][\U0001F000-\U0001FFFF] - [[:block=Symbols for Legacy Computing:][:block=Supplemental Arrows C:]]] & [:gc=Cn:] - [:NChar:]] # 9.0 Added range 1F000..1FFFF: all undesignated code points in this range are lb=ID # 13.0 exclude those in 1FB00..1FBFF Symbols for Legacy Computing +# 16.0 exclude Supplemental Arrows C \p{LB=ID} ⊃ $IDInclusions \p{Line_Break=Unknown} = [\p{General_Category=Unassigned} \p{GeneralCategory=PrivateUse} - $IDInclusions - [\u20C0-\u20CF]]