Skip to content

Commit

Permalink
Revert UCA changes for now
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Oct 24, 2023
1 parent aa727d6 commit a1c9c55
Show file tree
Hide file tree
Showing 3 changed files with 30,874 additions and 31,024 deletions.
97 changes: 6 additions & 91 deletions c/uca/sifter/unidata.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# unidata-16.0.0.txt
# Date: 2023-10-06
# unidata-15.1.0.txt
# Date: 2023-07-28, 00:00:00 GMT [KW]
# © 2023 Unicode®, Inc.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
Expand All @@ -9,6 +9,10 @@
# Default Unicode Collation Element Table (DUCET) for
# the Unicode Collation Algorithm.
#
# Version 15.1.0 draft 5 (Unicode Version: 15.1.0)
# based on Unicode data file UnicodeData-15.1.0d3.txt
# Ordering for Unicode 15.1
#
# Fields:
# Unicode;Name;Category;Decomposition;Num value;Comment;Uppercase;Lowercase;Titlecase

Expand Down Expand Up @@ -2207,8 +2211,6 @@ ABEB;MEETEI MAYEK CHEIKHEI;Po;;;;;;
11F44;KAWI DOUBLE DANDA;Po;;;;;;
16A6E;MRO DANDA;Po;;;;;;
16A6F;MRO DOUBLE DANDA;Po;;;;;;
16D6E;KIRAT RAI DANDA;Po;;;;;;
16D6F;KIRAT RAI DOUBLE DANDA;Po;;;;;;
1C7E;OL CHIKI PUNCTUATION MUCAAD;Po;;;;;;
1C7F;OL CHIKI PUNCTUATION DOUBLE MUCAAD;Po;;;;;;

Expand Down Expand Up @@ -2924,13 +2926,9 @@ AA5C;CHAM PUNCTUATION SPIRAL;Po;;;;;;
115D5;SIDDHAM SECTION MARK WITH CIRCLES AND RAYS;Po;;;;;;
115D6;SIDDHAM SECTION MARK WITH CIRCLES AND TWO ENCLOSURES;Po;;;;;;
115D7;SIDDHAM SECTION MARK WITH CIRCLES AND FOUR ENCLOSURES;Po;;;;;;

11643;MODI ABBREVIATION SIGN;Po;;;;;;
116B9;TAKRI ABBREVIATION SIGN;Po;;;;;;
1183B;DOGRA ABBREVIATION SIGN;Po;;;;;;
# L2/22-043R: Sign Yupi is used to make abbreviations
16D6D;KIRAT RAI SIGN YUPI;Po;;;;;;

11945;DIVES AKURU GAP FILLER;Po;;;;;;
119E2;NANDINAGARI SIGN SIDDHAM;Po;;;;;;
11FFF;TAMIL PUNCTUATION END OF TEXT;Po;;;;;;
Expand Down Expand Up @@ -11679,7 +11677,6 @@ A620;VAI DIGIT ZERO;Nd;;0;;;;
16A60;MRO DIGIT ZERO;Nd;;0;;;;
16AC0;TANGSA DIGIT ZERO;Nd;;0;;;;
16B50;PAHAWH HMONG DIGIT ZERO;Nd;;0;;;;
16D70;KIRAT RAI DIGIT ZERO;Nd;;0;;;;
1E140;NYIAKENG PUACHUE HMONG DIGIT ZERO;Nd;;0;;;;
1E2F0;WANCHO DIGIT ZERO;Nd;;0;;;;
1E4F0;NAG MUNDARI DIGIT ZERO;Nd;;0;;;;
Expand Down Expand Up @@ -11777,7 +11774,6 @@ A621;VAI DIGIT ONE;Nd;;1;;;;
16A61;MRO DIGIT ONE;Nd;;1;;;;
16AC1;TANGSA DIGIT ONE;Nd;;1;;;;
16B51;PAHAWH HMONG DIGIT ONE;Nd;;1;;;;
16D71;KIRAT RAI DIGIT ONE;Nd;;1;;;;
1E141;NYIAKENG PUACHUE HMONG DIGIT ONE;Nd;;1;;;;
1E2F1;WANCHO DIGIT ONE;Nd;;1;;;;
1E4F1;NAG MUNDARI DIGIT ONE;Nd;;1;;;;
Expand Down Expand Up @@ -11889,7 +11885,6 @@ A622;VAI DIGIT TWO;Nd;;2;;;;
16A62;MRO DIGIT TWO;Nd;;2;;;;
16AC2;TANGSA DIGIT TWO;Nd;;2;;;;
16B52;PAHAWH HMONG DIGIT TWO;Nd;;2;;;;
16D72;KIRAT RAI DIGIT TWO;Nd;;2;;;;
1E142;NYIAKENG PUACHUE HMONG DIGIT TWO;Nd;;2;;;;
1E2F2;WANCHO DIGIT TWO;Nd;;2;;;;
1E4F2;NAG MUNDARI DIGIT TWO;Nd;;2;;;;
Expand Down Expand Up @@ -12004,7 +11999,6 @@ A623;VAI DIGIT THREE;Nd;;3;;;;
16A63;MRO DIGIT THREE;Nd;;3;;;;
16AC3;TANGSA DIGIT THREE;Nd;;3;;;;
16B53;PAHAWH HMONG DIGIT THREE;Nd;;3;;;;
16D73;KIRAT RAI DIGIT THREE;Nd;;3;;;;
1E143;NYIAKENG PUACHUE HMONG DIGIT THREE;Nd;;3;;;;
1E2F3;WANCHO DIGIT THREE;Nd;;3;;;;
1E4F3;NAG MUNDARI DIGIT THREE;Nd;;3;;;;
Expand Down Expand Up @@ -12116,7 +12110,6 @@ A624;VAI DIGIT FOUR;Nd;;4;;;;
16A64;MRO DIGIT FOUR;Nd;;4;;;;
16AC4;TANGSA DIGIT FOUR;Nd;;4;;;;
16B54;PAHAWH HMONG DIGIT FOUR;Nd;;4;;;;
16D74;KIRAT RAI DIGIT FOUR;Nd;;4;;;;
1E144;NYIAKENG PUACHUE HMONG DIGIT FOUR;Nd;;4;;;;
1E2F4;WANCHO DIGIT FOUR;Nd;;4;;;;
1E4F4;NAG MUNDARI DIGIT FOUR;Nd;;4;;;;
Expand Down Expand Up @@ -12227,7 +12220,6 @@ A625;VAI DIGIT FIVE;Nd;;5;;;;
16A65;MRO DIGIT FIVE;Nd;;5;;;;
16AC5;TANGSA DIGIT FIVE;Nd;;5;;;;
16B55;PAHAWH HMONG DIGIT FIVE;Nd;;5;;;;
16D75;KIRAT RAI DIGIT FIVE;Nd;;5;;;;
1E145;NYIAKENG PUACHUE HMONG DIGIT FIVE;Nd;;5;;;;
1E2F5;WANCHO DIGIT FIVE;Nd;;5;;;;
1E4F5;NAG MUNDARI DIGIT FIVE;Nd;;5;;;;
Expand Down Expand Up @@ -12333,7 +12325,6 @@ A626;VAI DIGIT SIX;Nd;;6;;;;
16A66;MRO DIGIT SIX;Nd;;6;;;;
16AC6;TANGSA DIGIT SIX;Nd;;6;;;;
16B56;PAHAWH HMONG DIGIT SIX;Nd;;6;;;;
16D76;KIRAT RAI DIGIT SIX;Nd;;6;;;;
1E146;NYIAKENG PUACHUE HMONG DIGIT SIX;Nd;;6;;;;
1E2F6;WANCHO DIGIT SIX;Nd;;6;;;;
1E4F6;NAG MUNDARI DIGIT SIX;Nd;;6;;;;
Expand Down Expand Up @@ -12427,7 +12418,6 @@ A627;VAI DIGIT SEVEN;Nd;;7;;;;
16A67;MRO DIGIT SEVEN;Nd;;7;;;;
16AC7;TANGSA DIGIT SEVEN;Nd;;7;;;;
16B57;PAHAWH HMONG DIGIT SEVEN;Nd;;7;;;;
16D77;KIRAT RAI DIGIT SEVEN;Nd;;7;;;;
1E147;NYIAKENG PUACHUE HMONG DIGIT SEVEN;Nd;;7;;;;
1E2F7;WANCHO DIGIT SEVEN;Nd;;7;;;;
1E4F7;NAG MUNDARI DIGIT SEVEN;Nd;;7;;;;
Expand Down Expand Up @@ -12522,7 +12512,6 @@ A628;VAI DIGIT EIGHT;Nd;;8;;;;
16A68;MRO DIGIT EIGHT;Nd;;8;;;;
16AC8;TANGSA DIGIT EIGHT;Nd;;8;;;;
16B58;PAHAWH HMONG DIGIT EIGHT;Nd;;8;;;;
16D78;KIRAT RAI DIGIT EIGHT;Nd;;8;;;;
1E148;NYIAKENG PUACHUE HMONG DIGIT EIGHT;Nd;;8;;;;
1E2F8;WANCHO DIGIT EIGHT;Nd;;8;;;;
1E4F8;NAG MUNDARI DIGIT EIGHT;Nd;;8;;;;
Expand Down Expand Up @@ -12616,7 +12605,6 @@ A629;VAI DIGIT NINE;Nd;;9;;;;
16A69;MRO DIGIT NINE;Nd;;9;;;;
16AC9;TANGSA DIGIT NINE;Nd;;9;;;;
16B59;PAHAWH HMONG DIGIT NINE;Nd;;9;;;;
16D79;KIRAT RAI DIGIT NINE;Nd;;9;;;;
1E149;NYIAKENG PUACHUE HMONG DIGIT NINE;Nd;;9;;;;
1E2F9;WANCHO DIGIT NINE;Nd;;9;;;;
1E4F9;NAG MUNDARI DIGIT NINE;Nd;;9;;;;
Expand Down Expand Up @@ -33624,79 +33612,6 @@ A4F7;LISU LETTER OE;Lo;;;;;;
16ABD;TANGSA LETTER CHA;Lo;;;;;;
16ABE;TANGSA LETTER ZA;Lo;;;;;;

# Kirat Rai script begins here

# After Tangsa as suggested by Ken Whistler:
# Tangsa is another recently created script used in the same general area,
# and the code point ranges are in close vicinity (deliberately).
# It will also occur in the core spec in near vicinity. Tangsa is 13.20,
# and will be followed by 13.21 Sunuwar, 13.22 Gurung Khema, 13.23 Kirat Rai.

# In Kirat Rai, these moral equivalents of candrabindu, bindi, and visarga are not combining marks,
# and should not be equated to the Devanagari combining marks.
# L2/22-043R puts them before the regular letters (in code point order).
16D40;KIRAT RAI SIGN ANUSVARA;Lm;;;;;;
16D41;KIRAT RAI SIGN TONPI;Lm;;;;;;
16D42;KIRAT RAI SIGN VISARGA;Lm;;;;;;

16D43;KIRAT RAI LETTER A;Lo;;;;;;
16D44;KIRAT RAI LETTER KA;Lo;;;;;;
16D45;KIRAT RAI LETTER KHA;Lo;;;;;;
16D46;KIRAT RAI LETTER GA;Lo;;;;;;
16D47;KIRAT RAI LETTER GHA;Lo;;;;;;
16D48;KIRAT RAI LETTER NGA;Lo;;;;;;
16D49;KIRAT RAI LETTER CA;Lo;;;;;;
16D4A;KIRAT RAI LETTER CHA;Lo;;;;;;
16D4B;KIRAT RAI LETTER JA;Lo;;;;;;
16D4C;KIRAT RAI LETTER JHA;Lo;;;;;;
16D4D;KIRAT RAI LETTER NYA;Lo;;;;;;
16D4E;KIRAT RAI LETTER TTA;Lo;;;;;;
16D4F;KIRAT RAI LETTER TTHA;Lo;;;;;;
16D50;KIRAT RAI LETTER DDA;Lo;;;;;;
16D51;KIRAT RAI LETTER DDHA;Lo;;;;;;
16D52;KIRAT RAI LETTER TA;Lo;;;;;;
16D53;KIRAT RAI LETTER THA;Lo;;;;;;
16D54;KIRAT RAI LETTER DA;Lo;;;;;;
16D55;KIRAT RAI LETTER DHA;Lo;;;;;;
16D56;KIRAT RAI LETTER NA;Lo;;;;;;
16D57;KIRAT RAI LETTER PA;Lo;;;;;;
16D58;KIRAT RAI LETTER PHA;Lo;;;;;;
16D59;KIRAT RAI LETTER BA;Lo;;;;;;
16D5A;KIRAT RAI LETTER BHA;Lo;;;;;;
16D5B;KIRAT RAI LETTER MA;Lo;;;;;;
16D5C;KIRAT RAI LETTER YA;Lo;;;;;;
16D5D;KIRAT RAI LETTER RA;Lo;;;;;;
16D5E;KIRAT RAI LETTER LA;Lo;;;;;;
16D5F;KIRAT RAI LETTER VA;Lo;;;;;;
16D60;KIRAT RAI LETTER SA;Lo;;;;;;
16D61;KIRAT RAI LETTER SHA;Lo;;;;;;
16D62;KIRAT RAI LETTER HA;Lo;;;;;;
16D63;KIRAT RAI VOWEL SIGN AA;Lo;;;;;;
16D64;KIRAT RAI VOWEL SIGN I;Lo;;;;;;
16D65;KIRAT RAI VOWEL SIGN U;Lo;;;;;;
16D66;KIRAT RAI VOWEL SIGN UE;Lo;;;;;;
16D67;KIRAT RAI VOWEL SIGN E;Lo;;;;;;

# L2/22-043R Section 4.1:
# The Kirat Rai multipart vowels are intended to collate as units, not by their decompositions.
# Enclose them in CONTRACTION..DEFAULT.

CONTRACTION

16D68;KIRAT RAI VOWEL SIGN AI;Lo;16D67 16D67;;;;;
16D69;KIRAT RAI VOWEL SIGN O;Lo;16D63 16D67;;;;;
16D6A;KIRAT RAI VOWEL SIGN AU;Lo;16D69 16D67;;;;;

DEFAULT

# L2/22-043R: Difference between Sign Virama and Sign Saat:
# Both the signs are used to mute the inherent vowel sound. [...]
# SIGN SAAT is only used to mute the inherent vowel of the first letter of the word;
# all other places are represented by SIGN VIRAMA.
# Both the signs are represented in Devanagari by virama U+094D.
16D6B;KIRAT RAI SIGN VIRAMA;Lm;;;;;;
16D6C;KIRAT RAI SIGN SAAT;Lm;<sort> 16D6B;;;;;

# Aegean syllabic scripts start here

# Linear B script starts here
Expand Down
Loading

0 comments on commit a1c9c55

Please sign in to comment.