Skip to content

Commit

Permalink
Merge remote-tracking branch 'la-vache/main' into 173-C26
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Oct 23, 2023
2 parents fb0ea31 + 6280089 commit d70a830
Show file tree
Hide file tree
Showing 161 changed files with 721 additions and 244 deletions.
2 changes: 1 addition & 1 deletion UnicodeJsps/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM alpine as cbuild
WORKDIR /build
RUN apk add --update wget make gcc musl-dev
ARG CPATH=https://www.unicode.org/Public/PROGRAMS/BidiReferenceC/
ARG CVERSION=15.0.0
ARG CVERSION=15.1.0
RUN wget -np -nv --reject-regex='.*\.(lib|exe)$' --cut-dirs=4 -nH -r ${CPATH}${CVERSION}/
RUN cd source && gcc -I ../include/ -static -Os -o3 -o bidiref1 bidiref1.c brutils.c brtest.c brtable.c brrule.c
RUN ls -lh /build/source/bidiref1 && (/build/source/bidiref1 || true)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IdentifierStatus.txt
# Date: 2023-05-16, 22:25:15 GMT
# Date: 2023-08-11, 17:46:41 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -582,8 +582,8 @@ FA27..FA29 ; Allowed # 1.1 [3] CJK COMPATIBILITY IDEOGRAPH-FA27..CJK CO
2B740..2B81D ; Allowed # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Allowed # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Allowed # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE4A ; Allowed # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
2EBF0..2EE5D ; Allowed # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
30000..3134A ; Allowed # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Allowed # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 112759
# Total code points: 112778
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IdentifierType.txt
# Date: 2023-05-16, 22:25:14 GMT
# Date: 2023-08-11, 17:46:40 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -576,11 +576,11 @@ FA27..FA29 ; Recommended # 1.1 [3] CJK COMPATIBILITY ID
2B740..2B81D ; Recommended # 6.0 [222] CJK UNIFIED IDEOGRAPH-2B740..CJK UNIFIED IDEOGRAPH-2B81D
2B820..2CEA1 ; Recommended # 8.0 [5762] CJK UNIFIED IDEOGRAPH-2B820..CJK UNIFIED IDEOGRAPH-2CEA1
2CEB0..2EBE0 ; Recommended # 10.0 [7473] CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBF0..2EE4A ; Recommended # 15.1 [603] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
2EBF0..2EE5D ; Recommended # 15.1 [622] CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
30000..3134A ; Recommended # 13.0 [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A
31350..323AF ; Recommended # 15.0 [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF

# Total code points: 112742
# Total code points: 112761

# Identifier_Type: Inclusion

Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# IdnaMappingTable.txt
# Date: 2023-05-15, 22:37:02 GMT
# Date: 2023-08-10, 22:32:27 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -2036,7 +2036,7 @@
1E9A ; mapped ; 0061 02BE # 1.1 LATIN SMALL LETTER A WITH RIGHT HALF RING
1E9B ; mapped ; 1E61 # 2.0 LATIN SMALL LETTER LONG S WITH DOT ABOVE
1E9C..1E9D ; valid # 5.1 LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE..LATIN SMALL LETTER LONG S WITH HIGH STROKE
1E9E ; mapped ; 0073 0073 # 5.1 LATIN CAPITAL LETTER SHARP S
1E9E ; mapped ; 00DF # 5.1 LATIN CAPITAL LETTER SHARP S
1E9F ; valid # 5.1 LATIN SMALL LETTER DELTA
1EA0 ; mapped ; 1EA1 # 1.1 LATIN CAPITAL LETTER A WITH DOT BELOW
1EA1 ; valid # 1.1 LATIN SMALL LETTER A WITH DOT BELOW
Expand Down Expand Up @@ -3422,7 +3422,7 @@
31C0..31CF ; valid ; ; NV8 # 4.1 CJK STROKE T..CJK STROKE N
31D0..31E3 ; valid ; ; NV8 # 5.1 CJK STROKE H..CJK STROKE Q
31E4..31EE ; disallowed # NA <reserved-31E4>..<reserved-31EE>
31EF ; valid ; ; NV8 # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31EF ; disallowed # 15.1 IDEOGRAPHIC DESCRIPTION CHARACTER SUBTRACTION
31F0..31FF ; valid # 3.2 KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
3200 ; disallowed_STD3_mapped ; 0028 1100 0029 #1.1 PARENTHESIZED HANGUL KIYEOK
3201 ; disallowed_STD3_mapped ; 0028 1102 0029 #1.1 PARENTHESIZED HANGUL NIEUN
Expand Down Expand Up @@ -8448,8 +8448,8 @@ FFFE..FFFF ; disallowed # 1.1 <noncharacter-FFFE
2CEA2..2CEAF ; disallowed # NA <reserved-2CEA2>..<reserved-2CEAF>
2CEB0..2EBE0 ; valid # 10.0 CJK UNIFIED IDEOGRAPH-2CEB0..CJK UNIFIED IDEOGRAPH-2EBE0
2EBE1..2EBEF ; disallowed # NA <reserved-2EBE1>..<reserved-2EBEF>
2EBF0..2EE4A ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE4A
2EE4B..2F7FF ; disallowed # NA <reserved-2EE4B>..<reserved-2F7FF>
2EBF0..2EE5D ; valid # 15.1 CJK UNIFIED IDEOGRAPH-2EBF0..CJK UNIFIED IDEOGRAPH-2EE5D
2EE5E..2F7FF ; disallowed # NA <reserved-2EE5E>..<reserved-2F7FF>
2F800 ; mapped ; 4E3D # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F800
2F801 ; mapped ; 4E38 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F801
2F802 ; mapped ; 4E41 # 3.1 CJK COMPATIBILITY IDEOGRAPH-2F802
Expand Down
22 changes: 12 additions & 10 deletions UnicodeJsps/src/main/resources/org/unicode/jsp/NamesList.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,7 @@
; charset=UTF-8
@@@ The Unicode Standard 15.1.0
@@@+ U15M230512.lst
Unicode 15.1.0 names list, seventh delta.
Repertoire synched with UnicodeData-15.1.0d2.txt.
Tweak use of notices to suppress year expansions.
Update annotations for 06F4..06F7.
@@@+ U15M230728.lst
Unicode 15.1.0 final names list.
This file is semi-automatically derived from UnicodeData.txt and
a set of manually created annotations using a script to select
or suppress information from the data file. The rules used
Expand Down Expand Up @@ -2429,7 +2426,8 @@
= apostrophe
* glottal stop, glottalization, ejective
* many languages use this as a letter of their alphabets
* used as a tone marker in Bodo, Dogri, and Maithili
* used as a tone marker in Bodo and Dogri
* indicates vowel elongation, or various truncations and ellipsis in Maithili
* used as a modifier letter in the Lisu script
* 2019 is the preferred character for a punctuation apostrophe
x (apostrophe - 0027)
Expand Down Expand Up @@ -4456,7 +4454,8 @@
* Uyghur, Kazakh
06CC ARABIC LETTER FARSI YEH
* Arabic, Persian, Urdu, Kashmiri, ...
* initial and medial forms of this letter have dots
* initial and medial forms of this letter have two horizontal dots below
* retains its dots in initial and medial forms when used in combination with 0654
x (arabic letter alef maksura - 0649)
x (arabic letter yeh - 064A)
06CD ARABIC LETTER YEH WITH TAIL
Expand Down Expand Up @@ -19522,8 +19521,10 @@
* indicates pronunciation on one side of the mouth only
x (section sign - 00A7)
@ Dashes
@+ These long dashes are shown in the code charts inside dashed square boxes because of their width. In production fonts they would simply display as extra-wide dashes.
2E3A TWO-EM DASH
= omission dash
* may be used in Chinese for abrupt change of thought, inserting new content, or continuation of tone or sound
x (em dash - 2014)
2E3B THREE-EM DASH
@ Alternate forms of punctuation
Expand Down Expand Up @@ -26618,7 +26619,7 @@ D7FB HANGUL JONGSEONG PHIEUPH-THIEUTH
@@+
@+ This block, despite its name, contains a number of unified CJK ideographs. Each is also individually identified by an annotation.
@+ Subheaders identifying sources for subranges do not indicate required usage or preclude mappings to other sources. For example, many pronunciation variants from KS X 1001:1998 are also mapped to a J source.
@ Pronunciation variants from KS X 1001:1998
@ Pronunciation variants from KS X 1001:1998
F900 CJK COMPATIBILITY IDEOGRAPH-F900
: 8C48
F901 CJK COMPATIBILITY IDEOGRAPH-F901
Expand Down Expand Up @@ -44024,6 +44025,7 @@ FFFF <not a character>
1BC01 DUPLOYAN LETTER X
* Salishan
@ Line consonants
@+ Small arrows shown in the chart glyphs for some strokes and arcs indicate the handwriting direction of these characters, and are not visibly rendered.
1BC02 DUPLOYAN LETTER P
* Chinook number 1
1BC03 DUPLOYAN LETTER T
Expand Down Expand Up @@ -54200,9 +54202,9 @@ FFFF <not a character>
@@ 2B740 CJK Unified Ideographs Extension D 2B81D
@@ 2B820 CJK Unified Ideographs Extension E 2CEA1
@@ 2CEB0 CJK Unified Ideographs Extension F 2EBE0
@@ 2EBF0 CJK Unified Ideographs Extension I 2EE4A
@@ 2EBF0 CJK Unified Ideographs Extension I 2EE5D
@@ 2F800 CJK Compatibility Ideographs Supplement 2FA1F
@ Duplicate characters from CNS 11643-1992
@ Duplicate characters from CNS 11643-1992
2F800 CJK COMPATIBILITY IDEOGRAPH-2F800
: 4E3D
2F801 CJK COMPATIBILITY IDEOGRAPH-2F801
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# confusables.txt
# Date: 2023-05-16, 22:25:14 GMT
# Date: 2023-08-11, 17:46:40 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -349,8 +349,8 @@ A4FA ; 002E 002E ; MA # ( ꓺ → .. ) LISU LETTER TONE MYA CYA → FULL STOP, F

A6F4 ; A6F3 A6F3 ; MA #* ( ꛴ → ꛳꛳ ) BAMUM COLON → BAMUM FULL STOP, BAMUM FULL STOP #

30FB ; 00B7 ; MA #* ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
FF65 ; 00B7 ; MA #* ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
30FB ; 00B7 ; MA # ( ・ → · ) KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
FF65 ; 00B7 ; MA # ( ・ → · ) HALFWIDTH KATAKANA MIDDLE DOT → MIDDLE DOT # →•→
16EB ; 00B7 ; MA #* ( ᛫ → · ) RUNIC SINGLE PUNCTUATION → MIDDLE DOT #
0387 ; 00B7 ; MA # ( · → · ) GREEK ANO TELEIA → MIDDLE DOT #
2E31 ; 00B7 ; MA #* ( ⸱ → · ) WORD SEPARATOR MIDDLE DOT → MIDDLE DOT #
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# PropertyAliases-15.1.0.txt
# Date: 2023-03-23, 00:36:58 GMT
# Date: 2023-08-07, 15:21:34 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -122,6 +122,7 @@ ea ; East_Asian_Width
gc ; General_Category
GCB ; Grapheme_Cluster_Break
hst ; Hangul_Syllable_Type
InCB ; Indic_Conjunct_Break
InPC ; Indic_Positional_Category
InSC ; Indic_Syllabic_Category
jg ; Joining_Group
Expand Down Expand Up @@ -211,6 +212,6 @@ XO_NFKC ; Expands_On_NFKC
XO_NFKD ; Expands_On_NFKD

# ================================================
# Total: 133
# Total: 134

# EOF
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# PropertyValueAliases-15.1.0.txt
# Date: 2023-05-10, 16:59:10 GMT
# PropertyValueAliases-16.0.0.txt
# Date: 2023-10-17, 12:29:15 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -92,6 +92,7 @@ age; 13.0 ; V13_0
age; 14.0 ; V14_0
age; 15.0 ; V15_0
age; 15.1 ; V15_1
age; 16.0 ; V16_0
age; NA ; Unassigned

# Alphabetic (Alpha)
Expand Down Expand Up @@ -367,6 +368,7 @@ blk; Music ; Musical_Symbols
blk; Myanmar ; Myanmar
blk; Myanmar_Ext_A ; Myanmar_Extended_A
blk; Myanmar_Ext_B ; Myanmar_Extended_B
blk; Myanmar_Ext_C ; Myanmar_Extended_C
blk; Nabataean ; Nabataean
blk; Nag_Mundari ; Nag_Mundari
blk; Nandinagari ; Nandinagari
Expand Down Expand Up @@ -425,6 +427,7 @@ blk; Soyombo ; Soyombo
blk; Specials ; Specials
blk; Sundanese ; Sundanese
blk; Sundanese_Sup ; Sundanese_Supplement
blk; Sunuwar ; Sunuwar
blk; Sup_Arrows_A ; Supplemental_Arrows_A
blk; Sup_Arrows_B ; Supplemental_Arrows_B
blk; Sup_Arrows_C ; Supplemental_Arrows_C
Expand Down Expand Up @@ -460,6 +463,7 @@ blk; Thai ; Thai
blk; Tibetan ; Tibetan
blk; Tifinagh ; Tifinagh
blk; Tirhuta ; Tirhuta
blk; Todhri ; Todhri
blk; Toto ; Toto
blk; Transport_And_Map ; Transport_And_Map_Symbols
blk; UCAS ; Unified_Canadian_Aboriginal_Syllabics; Canadian_Syllabics
Expand Down Expand Up @@ -853,6 +857,13 @@ IDS; Y ; Yes ; T
Ideo; N ; No ; F ; False
Ideo; Y ; Yes ; T ; True

# Indic_Conjunct_Break (InCB)

InCB; Consonant ; Consonant
InCB; Extend ; Extend
InCB; Linker ; Linker
InCB; None ; None

# Indic_Positional_Category (InPC)

InPC; Bottom ; Bottom
Expand Down Expand Up @@ -1416,6 +1427,7 @@ sc ; Sogo ; Old_Sogdian
sc ; Sora ; Sora_Sompeng
sc ; Soyo ; Soyombo
sc ; Sund ; Sundanese
sc ; Sunu ; Sunuwar
sc ; Sylo ; Syloti_Nagri
sc ; Syrc ; Syriac
sc ; Tagb ; Tagbanwa
Expand All @@ -1433,6 +1445,7 @@ sc ; Thai ; Thai
sc ; Tibt ; Tibetan
sc ; Tirh ; Tirhuta
sc ; Tnsa ; Tangsa
sc ; Todr ; Todhri
sc ; Toto ; Toto
sc ; Ugar ; Ugaritic
sc ; Vaii ; Vai
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# emoji-sequences.txt
# Date: 2023-05-05, 23:24:34 GMT
# Date: 2023-06-05, 21:39:54 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
Expand Down Expand Up @@ -784,7 +784,7 @@
1F1F9 1F1F2 ; RGI_Emoji_Flag_Sequence ; flag: Turkmenistan # E2.0 [1] (🇹🇲)
1F1F9 1F1F3 ; RGI_Emoji_Flag_Sequence ; flag: Tunisia # E2.0 [1] (🇹🇳)
1F1F9 1F1F4 ; RGI_Emoji_Flag_Sequence ; flag: Tonga # E2.0 [1] (🇹🇴)
1F1F9 1F1F7 ; RGI_Emoji_Flag_Sequence ; flag: Turkey # E2.0 [1] (🇹🇷)
1F1F9 1F1F7 ; RGI_Emoji_Flag_Sequence ; flag: Türkiye # E2.0 [1] (🇹🇷)
1F1F9 1F1F9 ; RGI_Emoji_Flag_Sequence ; flag: Trinidad & Tobago # E2.0 [1] (🇹🇹)
1F1F9 1F1FB ; RGI_Emoji_Flag_Sequence ; flag: Tuvalu # E2.0 [1] (🇹🇻)
1F1F9 1F1FC ; RGI_Emoji_Flag_Sequence ; flag: Taiwan # E2.0 [1] (🇹🇼)
Expand Down
Loading

0 comments on commit d70a830

Please sign in to comment.