Skip to content

Commit

Permalink
LB=AL for Supplemental Arrows C (#580)
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin authored Nov 4, 2023
1 parent af8fba6 commit c45eca4
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 21 deletions.
12 changes: 2 additions & 10 deletions unicodetools/data/ucd/dev/LineBreak.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# LineBreak-16.0.0.txt
# Date: 2023-11-03, 05:19:42 GMT
# Date: 2023-11-03, 21:22:10 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -3552,20 +3552,12 @@ FFFD ; AI # So REPLACEMENT CHARACTER
1F7F0 ; ID # So HEAVY EQUALS SIGN
1F7F1..1F7FF ; ID # Cn [15] <reserved-1F7F1>..<reserved-1F7FF>
1F800..1F80B ; AL # So [12] LEFTWARDS ARROW WITH SMALL TRIANGLE ARROWHEAD..DOWNWARDS ARROW WITH LARGE TRIANGLE ARROWHEAD
1F80C..1F80F ; ID # Cn [4] <reserved-1F80C>..<reserved-1F80F>
1F810..1F847 ; AL # So [56] LEFTWARDS ARROW WITH SMALL EQUILATERAL ARROWHEAD..DOWNWARDS HEAVY ARROW
1F848..1F84F ; ID # Cn [8] <reserved-1F848>..<reserved-1F84F>
1F850..1F859 ; AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F85A..1F85F ; ID # Cn [6] <reserved-1F85A>..<reserved-1F85F>
1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F888..1F88F ; ID # Cn [8] <reserved-1F888>..<reserved-1F88F>
1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F8AE..1F8AF ; ID # Cn [2] <reserved-1F8AE>..<reserved-1F8AF>
1F8B0..1F8B1 ; ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK
1F8B3..1F8BF ; ID # Cn [13] <reserved-1F8B3>..<reserved-1F8BF>
1F8B0..1F8B2 ; AL # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK
1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW
1F8C2..1F8FF ; ID # Cn [62] <reserved-1F8C2>..<reserved-1F8FF>
1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1F90C ; EB # So PINCHED FINGERS
1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART
Expand Down
20 changes: 10 additions & 10 deletions unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# DerivedLineBreak-16.0.0.txt
# Date: 2023-11-03, 21:06:12 GMT
# Date: 2023-11-03, 21:22:07 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -39,11 +39,12 @@
# 1F680..1F6FF Transport_And_Map_Symbols
# 1F700..1F77F Alchemical_Symbols
# 1F780..1F7FF Geometric_Shapes_Extended
# 1F800..1F8FF Supplemental_Arrows_C
# @missing: 1F000..1F7FF; Ideographic

# 1F900..1F9FF Supplemental_Symbols_And_Pictographs
# 1FA00..1FA6F Chess_Symbols
# 1FA70..1FAFF Symbols_And_Pictographs_Extended_A
# @missing: 1F000..1FAFF; Ideographic
# @missing: 1F900..1FAFF; Ideographic

# @missing: 1FC00..1FFFD; Ideographic

Expand All @@ -68,8 +69,8 @@ E000..F8FF ; XX # Co [6400] <private-use-E000>..<private-use-F8FF>
F0000..FFFFD ; XX # Co [65534] <private-use-F0000>..<private-use-FFFFD>
100000..10FFFD; XX # Co [65534] <private-use-100000>..<private-use-10FFFD>

# The above property value applies to 762416 code points not listed here.
# Total code points: 899884
# The above property value applies to 762519 code points not listed here.
# Total code points: 899987

# ================================================

Expand Down Expand Up @@ -1600,14 +1601,14 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE
1F850..1F859 ; AL # So [10] LEFTWARDS SANS-SERIF ARROW..UP DOWN SANS-SERIF ARROW
1F860..1F887 ; AL # So [40] WIDE-HEADED LEFTWARDS LIGHT BARB ARROW..WIDE-HEADED SOUTH WEST VERY HEAVY BARB ARROW
1F890..1F8AD ; AL # So [30] LEFTWARDS TRIANGLE ARROWHEAD..WHITE ARROW SHAFT WIDTH TWO THIRDS
1F8B2 ; AL # So RIGHTWARDS ARROW WITH LOWER HOOK
1F8B0..1F8B2 ; AL # So [3] ARROW POINTING UPWARDS THEN NORTH WEST..RIGHTWARDS ARROW WITH LOWER HOOK
1F8C0..1F8C1 ; AL # So [2] LEFTWARDS ARROW FROM DOWNWARDS ARROW..RIGHTWARDS ARROW FROM DOWNWARDS ARROW
1F900..1F90B ; AL # So [12] CIRCLED CROSS FORMEE WITH FOUR DOTS..DOWNWARD FACING NOTCHED HOOK WITH DOT
1FA00..1FA53 ; AL # So [84] NEUTRAL CHESS KING..BLACK CHESS KNIGHT-BISHOP
1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK
1FB94..1FBCA ; AL # So [55] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..WHITE UP-POINTING CHEVRON

# Total code points: 21967
# Total code points: 21969

# ================================================

Expand Down Expand Up @@ -1816,7 +1817,6 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR
1F7D5..1F7D9 ; ID # So [5] CIRCLED TRIANGLE..NINE POINTED WHITE STAR
1F7E0..1F7EB ; ID # So [12] LARGE ORANGE CIRCLE..LARGE BROWN SQUARE
1F7F0 ; ID # So HEAVY EQUALS SIGN
1F8B0..1F8B1 ; ID # So [2] ARROW POINTING UPWARDS THEN NORTH WEST..ARROW POINTING RIGHTWARDS THEN CURVING SOUTH WEST
1F90D..1F90E ; ID # So [2] WHITE HEART..BROWN HEART
1F910..1F917 ; ID # So [8] ZIPPER-MOUTH FACE..HUGGING FACE
1F920..1F925 ; ID # So [6] FACE WITH COWBOY HAT..LYING FACE
Expand Down Expand Up @@ -1846,8 +1846,8 @@ FFE4 ; ID # So FULLWIDTH BROKEN BAR
30000..3134A ; ID # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; ID # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# The above property value applies to 61968 code points not listed here.
# Total code points: 172527
# The above property value applies to 61865 code points not listed here.
# Total code points: 172422

# ================================================

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -250,6 +250,8 @@ UnicodeMap<Line_Break_Values> build() {
addRangeValueIfAtLeast(lb, 0x1F000, 0x1FFFD, 0x90000, ID);
// Unicode 13+: punch a hole
addRangeValueIfAtLeast(lb, 0x1FB00, 0x1FBFF, 0xD0000, XX);
// Unicode 16+: Supplemental Arrows-C to XX.
addRangeValueIfAtLeast(lb, 0x1F800, 0x1F8FF, 0x100000, XX);

return lb;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -544,9 +544,10 @@ Let $nonAlphabeticDependentVowels = [\N{ORIYA SIGN OVERLINE}\N{THAI CHARACTER MA
# LineBreak property
##########################

Let $IDInclusions = [[:block=/Ideographs/:] [[\U00020000-\U0003FFFF][\U0001F000-\U0001FAFF][\U0001FC00-\U0001FFFF]] & [:gc=Cn:] - [:NChar:]]
Let $IDInclusions = [[:block=/Ideographs/:] [[\U00020000-\U0003FFFF][\U0001F000-\U0001FFFF] - [[:block=Symbols for Legacy Computing:][:block=Supplemental Arrows C:]]] & [:gc=Cn:] - [:NChar:]]
# 9.0 Added range 1F000..1FFFF: all undesignated code points in this range are lb=ID
# 13.0 exclude those in 1FB00..1FBFF Symbols for Legacy Computing
# 16.0 exclude Supplemental Arrows C
\p{LB=ID} ⊃ $IDInclusions
\p{Line_Break=Unknown} = [\p{General_Category=Unassigned} \p{GeneralCategory=PrivateUse} - $IDInclusions - [\u20C0-\u20CF]]

Expand Down

0 comments on commit c45eca4

Please sign in to comment.