From e3dc269bf460f025a6091a9b1260e1881ace6833 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:10:28 +0200 Subject: [PATCH 01/14] UnicodeData.txt lines from L2/24-172 --- unicodetools/data/ucd/dev/UnicodeData.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index 64258a373..fc81954b1 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,3 +1,9 @@ +1D250;MUSICAL SYMBOL COMBINING FLAG-6;Mc;216;L;;;;;N;;;;; +1D251;MUSICAL SYMBOL COMBINING FLAG-7;Mc;216;L;;;;;N;;;;; +1D252;MUSICAL SYMBOL COMBINING FLAG-8;Mc;216;L;;;;;N;;;;; +1D253;MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST;So;0;L;;;;;N;;;;; +1D254;MUSICAL SYMBOL FIVE HUNDRED TWELFTH REST;So;0;L;;;;;N;;;;; +1D255;MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST;So;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; From 9aac41dd51f984a654eb05a049262bb126ca8d80 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:15:45 +0200 Subject: [PATCH 02/14] lb=CM like the existing flags, lb=AL like the existing rests --- unicodetools/data/ucd/dev/LineBreak.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/unicodetools/data/ucd/dev/LineBreak.txt b/unicodetools/data/ucd/dev/LineBreak.txt index 9dc61d95d..62a88d4d4 100644 --- a/unicodetools/data/ucd/dev/LineBreak.txt +++ b/unicodetools/data/ucd/dev/LineBreak.txt @@ -1,5 +1,5 @@ # LineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:19 GMT +# Date: 2024-07-25, 15:12:30 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -3321,6 +3321,8 @@ FFFD ; AI # So REPLACEMENT CHARACTER 1D200..1D241 ; AL # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D242..1D244 ; CM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME 1D245 ; AL # So GREEK MUSICAL LEIMMA +1D250..1D252 ; CM # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; AL # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; AL # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; AL # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D300..1D356 ; AL # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING From 7e3f781101b5f80f62dbc2b2a4e3b4bd8aff6423 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:16:44 +0200 Subject: [PATCH 03/14] Common --- unicodetools/data/ucd/dev/Scripts.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 443a6d2dd..5e340ac82 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,3 +1,5 @@ +1D250..1D252;Common +1D253..1D255;Common # Scripts-16.0.0.txt # Date: 2024-04-30, 21:48:40 GMT # © 2024 Unicode®, Inc. From d64317566eacc837e93820a465d34574dfdf2f11 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:17:55 +0200 Subject: [PATCH 04/14] ShortBlockNames.txt --- .../src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt index eaa03a0f7..f4f631f72 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/ShortBlockNames.txt @@ -215,6 +215,7 @@ Mongolian_Sup ; Mongolian_Supplement Mro ; Mro Multani ; Multani Music ; Musical_Symbols +Music_Sup ; Musical_Symbols_Supplement Myanmar ; Myanmar Myanmar_Ext_A ; Myanmar_Extended_A Myanmar_Ext_B ; Myanmar_Extended_B From 59181c791af6ea3a9a259a52c55315424f5c288e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:18:41 +0200 Subject: [PATCH 05/14] Blocks.txt --- unicodetools/data/ucd/dev/Blocks.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/Blocks.txt b/unicodetools/data/ucd/dev/Blocks.txt index 1517dde0c..910e458cb 100644 --- a/unicodetools/data/ucd/dev/Blocks.txt +++ b/unicodetools/data/ucd/dev/Blocks.txt @@ -322,6 +322,7 @@ FFF0..FFFF; Specials 1D000..1D0FF; Byzantine Musical Symbols 1D100..1D1FF; Musical Symbols 1D200..1D24F; Ancient Greek Musical Notation +1D250..1D28F; Musical Symbols Supplement 1D2C0..1D2DF; Kaktovik Numerals 1D2E0..1D2FF; Mayan Numerals 1D300..1D35F; Tai Xuan Jing Symbols From 5196a3e2f70199fd4ec1d060b47433060d323579 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:20:25 +0200 Subject: [PATCH 06/14] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedAge.txt | 5 +++-- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 12 ++++++++---- unicodetools/data/ucd/dev/EastAsianWidth.txt | 4 +++- unicodetools/data/ucd/dev/NormalizationTest.txt | 8 +++++++- unicodetools/data/ucd/dev/PropertyValueAliases.txt | 3 ++- unicodetools/data/ucd/dev/Scripts.txt | 8 ++++---- unicodetools/data/ucd/dev/UnicodeData.txt | 12 ++++++------ unicodetools/data/ucd/dev/VerticalOrientation.txt | 4 +++- .../ucd/dev/auxiliary/GraphemeBreakProperty.txt | 5 +++-- .../ucd/dev/auxiliary/SentenceBreakProperty.txt | 5 +++-- .../data/ucd/dev/auxiliary/WordBreakProperty.txt | 5 +++-- .../data/ucd/dev/extracted/DerivedBidiClass.txt | 6 ++++-- .../ucd/dev/extracted/DerivedCombiningClass.txt | 10 ++++++---- .../ucd/dev/extracted/DerivedEastAsianWidth.txt | 6 ++++-- .../ucd/dev/extracted/DerivedGeneralCategory.txt | 13 ++++++++----- .../data/ucd/dev/extracted/DerivedLineBreak.txt | 12 +++++++----- unicodetools/data/ucd/dev/extracted/DerivedName.txt | 10 ++++++++-- 17 files changed, 82 insertions(+), 46 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedAge.txt b/unicodetools/data/ucd/dev/DerivedAge.txt index b4dcd2e48..f7fd3228d 100644 --- a/unicodetools/data/ucd/dev/DerivedAge.txt +++ b/unicodetools/data/ucd/dev/DerivedAge.txt @@ -1,5 +1,5 @@ # DerivedAge-16.0.0.txt -# Date: 2024-04-30, 21:48:12 GMT +# Date: 2024-07-25, 15:19:16 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2044,6 +2044,7 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 18CFF ; 16.0 # KHITAN SMALL SCRIPT CHARACTER-18CFF 1CC00..1CCF9 ; 16.0 # [250] UP-POINTING GO-KART..OUTLINED DIGIT NINE 1CD00..1CEB3 ; 16.0 # [436] BLOCK OCTANT-3..BLACK RIGHT TRIANGLE CARET +1D250..1D255 ; 16.0 # [6] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1E5D0..1E5FA ; 16.0 # [43] OL ONAL LETTER O..OL ONAL DIGIT NINE 1E5FF ; 16.0 # OL ONAL ABBREVIATION SIGN 1F8B2..1F8BB ; 16.0 # [10] RIGHTWARDS ARROW WITH LOWER HOOK..SOUTH WEST ARROW FROM BAR @@ -2057,6 +2058,6 @@ A7DA..A7DC ; 16.0 # [3] LATIN CAPITAL LETTER LAMBDA..LATIN CAPITAL LETTER L 1FAE9 ; 16.0 # FACE WITH BAGS UNDER EYES 1FBCB..1FBEF ; 16.0 # [37] WHITE CROSS MARK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 5185 +# Total code points: 5191 # EOF diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 1075638f1..6806d4a02 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-05-31, 18:09:32 GMT +# Date: 2024-07-25, 15:19:41 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -8249,6 +8249,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 1D185..1D18B ; ID_Continue # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; ID_Continue # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; ID_Continue # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; ID_Continue # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1D400..1D454 ; ID_Continue # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; ID_Continue # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; ID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -8370,7 +8371,7 @@ FFDA..FFDC ; ID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HAN 31350..323AF ; ID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; ID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144541 +# Total code points: 144544 # ================================================ @@ -10436,6 +10437,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 1D185..1D18B ; XID_Continue # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; XID_Continue # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; XID_Continue # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; XID_Continue # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1D400..1D454 ; XID_Continue # L& [85] MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G 1D456..1D49C ; XID_Continue # L& [71] MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A 1D49E..1D49F ; XID_Continue # L& [2] MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D @@ -10557,7 +10559,7 @@ FFDA..FFDC ; XID_Continue # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HA 31350..323AF ; XID_Continue # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF E0100..E01EF ; XID_Continue # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 144522 +# Total code points: 144525 # ================================================ @@ -12638,6 +12640,8 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1D1AE..1D1EA ; Grapheme_Base # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON 1D200..1D241 ; Grapheme_Base # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; Grapheme_Base # So GREEK MUSICAL LEIMMA +1D250..1D252 ; Grapheme_Base # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; Grapheme_Base # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; Grapheme_Base # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; Grapheme_Base # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D300..1D356 ; Grapheme_Base # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING @@ -12812,7 +12816,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152730 +# Total code points: 152736 # ================================================ diff --git a/unicodetools/data/ucd/dev/EastAsianWidth.txt b/unicodetools/data/ucd/dev/EastAsianWidth.txt index 99f7a31ea..6cd81f63a 100644 --- a/unicodetools/data/ucd/dev/EastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/EastAsianWidth.txt @@ -1,5 +1,5 @@ # EastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:20 GMT +# Date: 2024-07-25, 15:19:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2406,6 +2406,8 @@ FFFD ; A # So REPLACEMENT CHARACTER 1D200..1D241 ; N # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D242..1D244 ; N # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME 1D245 ; N # So GREEK MUSICAL LEIMMA +1D250..1D252 ; N # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; N # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; N # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; N # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D300..1D356 ; W # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING diff --git a/unicodetools/data/ucd/dev/NormalizationTest.txt b/unicodetools/data/ucd/dev/NormalizationTest.txt index 3aae8f72e..e70bc5475 100644 --- a/unicodetools/data/ucd/dev/NormalizationTest.txt +++ b/unicodetools/data/ucd/dev/NormalizationTest.txt @@ -1,5 +1,5 @@ # NormalizationTest-16.0.0.txt -# Date: 2024-04-30, 21:48:23 GMT +# Date: 2024-07-25, 15:19:53 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -18904,6 +18904,12 @@ FFEE;FFEE;FFEE;25CB;25CB; # (○; ○; ○; ○; ○; ) HALFWIDTH WHITE CIRCLE 0061 1D243 0315 0300 05AE 0062;0061 05AE 1D243 0300 0315 0062;0061 05AE 1D243 0300 0315 0062;0061 05AE 1D243 0300 0315 0062;0061 05AE 1D243 0300 0315 0062; # (a◌𝉃◌̕◌̀◌֮b; a◌֮◌𝉃◌̀◌̕b; a◌֮◌𝉃◌̀◌̕b; a◌֮◌𝉃◌̀◌̕b; a◌֮◌𝉃◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GREEK MUSICAL TETRASEME, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1D244 0062;00E0 05AE 1D244 0315 0062;0061 05AE 0300 1D244 0315 0062;00E0 05AE 1D244 0315 0062;0061 05AE 0300 1D244 0315 0062; # (a◌̕◌̀◌֮◌𝉄b; à◌֮◌𝉄◌̕b; a◌֮◌̀◌𝉄◌̕b; à◌֮◌𝉄◌̕b; a◌֮◌̀◌𝉄◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING GREEK MUSICAL PENTASEME, LATIN SMALL LETTER B 0061 1D244 0315 0300 05AE 0062;0061 05AE 1D244 0300 0315 0062;0061 05AE 1D244 0300 0315 0062;0061 05AE 1D244 0300 0315 0062;0061 05AE 1D244 0300 0315 0062; # (a◌𝉄◌̕◌̀◌֮b; a◌֮◌𝉄◌̀◌̕b; a◌֮◌𝉄◌̀◌̕b; a◌֮◌𝉄◌̀◌̕b; a◌֮◌𝉄◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GREEK MUSICAL PENTASEME, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B +0061 1DFA 031B 1DCE 1D250 0062;0061 1DCE 031B 1D250 1DFA 0062;0061 1DCE 031B 1D250 1DFA 0062;0061 1DCE 031B 1D250 1DFA 0062;0061 1DCE 031B 1D250 1DFA 0062; # (a◌᷺◌̛◌᷎𝉐b; a◌᷎◌̛𝉐◌᷺b; a◌᷎◌̛𝉐◌᷺b; a◌᷎◌̛𝉐◌᷺b; a◌᷎◌̛𝉐◌᷺b; ) LATIN SMALL LETTER A, COMBINING DOT BELOW LEFT, COMBINING HORN, COMBINING OGONEK ABOVE, MUSICAL SYMBOL COMBINING FLAG-6, LATIN SMALL LETTER B +0061 1D250 1DFA 031B 1DCE 0062;0061 1DCE 1D250 031B 1DFA 0062;0061 1DCE 1D250 031B 1DFA 0062;0061 1DCE 1D250 031B 1DFA 0062;0061 1DCE 1D250 031B 1DFA 0062; # (a𝉐◌᷺◌̛◌᷎b; a◌᷎𝉐◌̛◌᷺b; a◌᷎𝉐◌̛◌᷺b; a◌᷎𝉐◌̛◌᷺b; a◌᷎𝉐◌̛◌᷺b; ) LATIN SMALL LETTER A, MUSICAL SYMBOL COMBINING FLAG-6, COMBINING DOT BELOW LEFT, COMBINING HORN, COMBINING OGONEK ABOVE, LATIN SMALL LETTER B +0061 1DFA 031B 1DCE 1D251 0062;0061 1DCE 031B 1D251 1DFA 0062;0061 1DCE 031B 1D251 1DFA 0062;0061 1DCE 031B 1D251 1DFA 0062;0061 1DCE 031B 1D251 1DFA 0062; # (a◌᷺◌̛◌᷎𝉑b; a◌᷎◌̛𝉑◌᷺b; a◌᷎◌̛𝉑◌᷺b; a◌᷎◌̛𝉑◌᷺b; a◌᷎◌̛𝉑◌᷺b; ) LATIN SMALL LETTER A, COMBINING DOT BELOW LEFT, COMBINING HORN, COMBINING OGONEK ABOVE, MUSICAL SYMBOL COMBINING FLAG-7, LATIN SMALL LETTER B +0061 1D251 1DFA 031B 1DCE 0062;0061 1DCE 1D251 031B 1DFA 0062;0061 1DCE 1D251 031B 1DFA 0062;0061 1DCE 1D251 031B 1DFA 0062;0061 1DCE 1D251 031B 1DFA 0062; # (a𝉑◌᷺◌̛◌᷎b; a◌᷎𝉑◌̛◌᷺b; a◌᷎𝉑◌̛◌᷺b; a◌᷎𝉑◌̛◌᷺b; a◌᷎𝉑◌̛◌᷺b; ) LATIN SMALL LETTER A, MUSICAL SYMBOL COMBINING FLAG-7, COMBINING DOT BELOW LEFT, COMBINING HORN, COMBINING OGONEK ABOVE, LATIN SMALL LETTER B +0061 1DFA 031B 1DCE 1D252 0062;0061 1DCE 031B 1D252 1DFA 0062;0061 1DCE 031B 1D252 1DFA 0062;0061 1DCE 031B 1D252 1DFA 0062;0061 1DCE 031B 1D252 1DFA 0062; # (a◌᷺◌̛◌᷎𝉒b; a◌᷎◌̛𝉒◌᷺b; a◌᷎◌̛𝉒◌᷺b; a◌᷎◌̛𝉒◌᷺b; a◌᷎◌̛𝉒◌᷺b; ) LATIN SMALL LETTER A, COMBINING DOT BELOW LEFT, COMBINING HORN, COMBINING OGONEK ABOVE, MUSICAL SYMBOL COMBINING FLAG-8, LATIN SMALL LETTER B +0061 1D252 1DFA 031B 1DCE 0062;0061 1DCE 1D252 031B 1DFA 0062;0061 1DCE 1D252 031B 1DFA 0062;0061 1DCE 1D252 031B 1DFA 0062;0061 1DCE 1D252 031B 1DFA 0062; # (a𝉒◌᷺◌̛◌᷎b; a◌᷎𝉒◌̛◌᷺b; a◌᷎𝉒◌̛◌᷺b; a◌᷎𝉒◌̛◌᷺b; a◌᷎𝉒◌̛◌᷺b; ) LATIN SMALL LETTER A, MUSICAL SYMBOL COMBINING FLAG-8, COMBINING DOT BELOW LEFT, COMBINING HORN, COMBINING OGONEK ABOVE, LATIN SMALL LETTER B 0061 0315 0300 05AE 1E000 0062;00E0 05AE 1E000 0315 0062;0061 05AE 0300 1E000 0315 0062;00E0 05AE 1E000 0315 0062;0061 05AE 0300 1E000 0315 0062; # (a◌̕◌̀◌֮◌𞀀b; à◌֮◌𞀀◌̕b; a◌֮◌̀◌𞀀◌̕b; à◌֮◌𞀀◌̕b; a◌֮◌̀◌𞀀◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING GLAGOLITIC LETTER AZU, LATIN SMALL LETTER B 0061 1E000 0315 0300 05AE 0062;0061 05AE 1E000 0300 0315 0062;0061 05AE 1E000 0300 0315 0062;0061 05AE 1E000 0300 0315 0062;0061 05AE 1E000 0300 0315 0062; # (a◌𞀀◌̕◌̀◌֮b; a◌֮◌𞀀◌̀◌̕b; a◌֮◌𞀀◌̀◌̕b; a◌֮◌𞀀◌̀◌̕b; a◌֮◌𞀀◌̀◌̕b; ) LATIN SMALL LETTER A, COMBINING GLAGOLITIC LETTER AZU, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, LATIN SMALL LETTER B 0061 0315 0300 05AE 1E001 0062;00E0 05AE 1E001 0315 0062;0061 05AE 0300 1E001 0315 0062;00E0 05AE 1E001 0315 0062;0061 05AE 0300 1E001 0315 0062; # (a◌̕◌̀◌֮◌𞀁b; à◌֮◌𞀁◌̕b; a◌֮◌̀◌𞀁◌̕b; à◌֮◌𞀁◌̕b; a◌֮◌̀◌𞀁◌̕b; ) LATIN SMALL LETTER A, COMBINING COMMA ABOVE RIGHT, COMBINING GRAVE ACCENT, HEBREW ACCENT ZINOR, COMBINING GLAGOLITIC LETTER BUKY, LATIN SMALL LETTER B diff --git a/unicodetools/data/ucd/dev/PropertyValueAliases.txt b/unicodetools/data/ucd/dev/PropertyValueAliases.txt index 0c42297a1..f88f13b2a 100644 --- a/unicodetools/data/ucd/dev/PropertyValueAliases.txt +++ b/unicodetools/data/ucd/dev/PropertyValueAliases.txt @@ -1,5 +1,5 @@ # PropertyValueAliases-16.0.0.txt -# Date: 2024-06-06, 22:21:34 GMT +# Date: 2024-07-25, 15:20:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -369,6 +369,7 @@ blk; Mongolian_Sup ; Mongolian_Supplement blk; Mro ; Mro blk; Multani ; Multani blk; Music ; Musical_Symbols +blk; Music_Sup ; Musical_Symbols_Supplement blk; Myanmar ; Myanmar blk; Myanmar_Ext_A ; Myanmar_Extended_A blk; Myanmar_Ext_B ; Myanmar_Extended_B diff --git a/unicodetools/data/ucd/dev/Scripts.txt b/unicodetools/data/ucd/dev/Scripts.txt index 5e340ac82..9c40a4a37 100644 --- a/unicodetools/data/ucd/dev/Scripts.txt +++ b/unicodetools/data/ucd/dev/Scripts.txt @@ -1,7 +1,5 @@ -1D250..1D252;Common -1D253..1D255;Common # Scripts-16.0.0.txt -# Date: 2024-04-30, 21:48:40 GMT +# Date: 2024-07-25, 15:20:13 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -538,6 +536,8 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR 1D183..1D184 ; Common # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN 1D18C..1D1A9 ; Common # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH 1D1AE..1D1EA ; Common # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON +1D250..1D252 ; Common # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; Common # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; Common # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; Common # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D300..1D356 ; Common # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING @@ -635,7 +635,7 @@ FFFC..FFFD ; Common # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHAR E0001 ; Common # Cf LANGUAGE TAG E0020..E007F ; Common # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 9053 +# Total code points: 9059 # ================================================ diff --git a/unicodetools/data/ucd/dev/UnicodeData.txt b/unicodetools/data/ucd/dev/UnicodeData.txt index fc81954b1..53c24298f 100644 --- a/unicodetools/data/ucd/dev/UnicodeData.txt +++ b/unicodetools/data/ucd/dev/UnicodeData.txt @@ -1,9 +1,3 @@ -1D250;MUSICAL SYMBOL COMBINING FLAG-6;Mc;216;L;;;;;N;;;;; -1D251;MUSICAL SYMBOL COMBINING FLAG-7;Mc;216;L;;;;;N;;;;; -1D252;MUSICAL SYMBOL COMBINING FLAG-8;Mc;216;L;;;;;N;;;;; -1D253;MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST;So;0;L;;;;;N;;;;; -1D254;MUSICAL SYMBOL FIVE HUNDRED TWELFTH REST;So;0;L;;;;;N;;;;; -1D255;MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST;So;0;L;;;;;N;;;;; 0000;;Cc;0;BN;;;;;N;NULL;;;; 0001;;Cc;0;BN;;;;;N;START OF HEADING;;;; 0002;;Cc;0;BN;;;;;N;START OF TEXT;;;; @@ -33805,6 +33799,12 @@ FFFD;REPLACEMENT CHARACTER;So;0;ON;;;;;N;;;;; 1D243;COMBINING GREEK MUSICAL TETRASEME;Mn;230;NSM;;;;;N;;;;; 1D244;COMBINING GREEK MUSICAL PENTASEME;Mn;230;NSM;;;;;N;;;;; 1D245;GREEK MUSICAL LEIMMA;So;0;ON;;;;;N;;;;; +1D250;MUSICAL SYMBOL COMBINING FLAG-6;Mc;216;L;;;;;N;;;;; +1D251;MUSICAL SYMBOL COMBINING FLAG-7;Mc;216;L;;;;;N;;;;; +1D252;MUSICAL SYMBOL COMBINING FLAG-8;Mc;216;L;;;;;N;;;;; +1D253;MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST;So;0;L;;;;;N;;;;; +1D254;MUSICAL SYMBOL FIVE HUNDRED TWELFTH REST;So;0;L;;;;;N;;;;; +1D255;MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST;So;0;L;;;;;N;;;;; 1D2C0;KAKTOVIK NUMERAL ZERO;No;0;L;;;;0;N;;;;; 1D2C1;KAKTOVIK NUMERAL ONE;No;0;L;;;;1;N;;;;; 1D2C2;KAKTOVIK NUMERAL TWO;No;0;L;;;;2;N;;;;; diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 1ebcd7228..35b86d140 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -1,5 +1,5 @@ # VerticalOrientation-16.0.0.txt -# Date: 2024-04-30, 21:48:42 GMT +# Date: 2024-07-25, 15:20:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2254,6 +2254,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1D200..1D241 ; R # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D242..1D244 ; R # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME 1D245 ; R # So GREEK MUSICAL LEIMMA +1D250..1D252 ; R # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; R # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; R # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; U # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D2F4..1D2FF ; U # Cn [12] .. diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index a863397dd..fb56b975c 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2024-05-31, 18:09:38 GMT +# Date: 2024-07-25, 15:19:48 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -660,8 +660,9 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11F3E..11F3F ; SpacingMark # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 1612A..1612C ; SpacingMark # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI +1D250..1D252 ; SpacingMark # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 -# Total code points: 378 +# Total code points: 381 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 39fdb57c0..658ca97d7 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-05-13, 20:53:44 GMT +# Date: 2024-07-25, 15:20:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -564,6 +564,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; Extend # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1DA00..1DA36 ; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN 1DA3B..1DA6C ; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT 1DA75 ; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS @@ -586,7 +587,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2601 +# Total code points: 2604 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt index 925ea3c48..54916db2b 100644 --- a/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/WordBreakProperty.txt @@ -1,5 +1,5 @@ # WordBreakProperty-16.0.0.txt -# Date: 2024-04-30, 21:48:43 GMT +# Date: 2024-07-25, 15:20:17 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -600,6 +600,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; Extend # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1DA00..1DA36 ; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN 1DA3B..1DA6C ; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT 1DA75 ; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS @@ -623,7 +624,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2605 +# Total code points: 2608 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt index 2aceac0aa..daef42fcd 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedBidiClass.txt @@ -1,5 +1,5 @@ # DerivedBidiClass-16.0.0.txt -# Date: 2024-04-30, 21:48:13 GMT +# Date: 2024-07-25, 15:19:38 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1132,6 +1132,8 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER 1D183..1D184 ; L # So [2] MUSICAL SYMBOL ARPEGGIATO UP..MUSICAL SYMBOL ARPEGGIATO DOWN 1D18C..1D1A9 ; L # So [30] MUSICAL SYMBOL RINFORZANDO..MUSICAL SYMBOL DEGREE SLASH 1D1AE..1D1E8 ; L # So [59] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KIEVAN FLAT SIGN +1D250..1D252 ; L # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; L # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; L # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; L # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D360..1D378 ; L # No [25] COUNTING ROD UNIT DIGIT ONE..TALLY MARK FIVE @@ -1214,7 +1216,7 @@ FFDA..FFDC ; L # Lo [3] HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER F0000..FFFFD ; L # Co [65534] .. 100000..10FFFD; L # Co [65534] .. -# The above property value applies to 815351 code points not listed here. +# The above property value applies to 815345 code points not listed here. # Total code points: 1095513 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt index a5d57af96..a9cae222b 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedCombiningClass.txt @@ -1,5 +1,5 @@ # DerivedCombiningClass-16.0.0.txt -# Date: 2024-04-30, 21:48:15 GMT +# Date: 2024-07-25, 15:19:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1875,6 +1875,7 @@ FFFC..FFFD ; 0 # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1D1AE..1D1EA ; 0 # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON 1D200..1D241 ; 0 # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; 0 # So GREEK MUSICAL LEIMMA +1D253..1D255 ; 0 # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; 0 # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; 0 # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D300..1D356 ; 0 # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING @@ -2060,8 +2061,8 @@ E0100..E01EF ; 0 # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 F0000..FFFFD ; 0 # Co [65534] .. 100000..10FFFD; 0 # Co [65534] .. -# The above property value applies to 821581 code points not listed here. -# Total code points: 1113178 +# The above property value applies to 821575 code points not listed here. +# Total code points: 1113175 # ================================================ @@ -2524,8 +2525,9 @@ FB1E ; 26 # Mn HEBREW POINT JUDEO-SPANISH VARIKA 0F39 ; 216 # Mn TIBETAN MARK TSA -PHRU 1D165..1D166 ; 216 # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16E..1D172 ; 216 # Mc [5] MUSICAL SYMBOL COMBINING FLAG-1..MUSICAL SYMBOL COMBINING FLAG-5 +1D250..1D252 ; 216 # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 -# Total code points: 9 +# Total code points: 12 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt index cc1d91aaa..aa72f45ce 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedEastAsianWidth.txt @@ -1,5 +1,5 @@ # DerivedEastAsianWidth-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-07-25, 15:19:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1905,6 +1905,8 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER 1D200..1D241 ; N # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D242..1D244 ; N # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME 1D245 ; N # So GREEK MUSICAL LEIMMA +1D250..1D252 ; N # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; N # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; N # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; N # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D377..1D378 ; N # No [2] TALLY MARK ONE..TALLY MARK FIVE @@ -2103,7 +2105,7 @@ FFFC ; N # So OBJECT REPLACEMENT CHARACTER E0001 ; N # Cf LANGUAGE TAG E0020..E007F ; N # Cf [96] TAG SPACE..CANCEL TAG -# The above property value applies to 761099 code points not listed here. +# The above property value applies to 761093 code points not listed here. # Total code points: 792420 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt index 07bf7bca9..759cbe1d3 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedGeneralCategory.txt @@ -1,5 +1,5 @@ # DerivedGeneralCategory-16.0.0.txt -# Date: 2024-04-30, 21:48:17 GMT +# Date: 2024-07-25, 15:19:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -604,7 +604,8 @@ FFFE..FFFF ; Cn # [2] .. 1D0F6..1D0FF ; Cn # [10] .. 1D127..1D128 ; Cn # [2] .. 1D1EB..1D1FF ; Cn # [21] .. -1D246..1D2BF ; Cn # [122] .. +1D246..1D24F ; Cn # [10] .. +1D256..1D2BF ; Cn # [106] .. 1D2D4..1D2DF ; Cn # [12] .. 1D2F4..1D2FF ; Cn # [12] .. 1D357..1D35F ; Cn # [9] .. @@ -747,7 +748,7 @@ E01F0..EFFFF ; Cn # [65040] .. FFFFE..FFFFF ; Cn # [2] .. 10FFFE..10FFFF; Cn # [2] .. -# Total code points: 819533 +# Total code points: 819527 # ================================================ @@ -3280,8 +3281,9 @@ ABEC ; Mc # MEETEI MAYEK LUM IYEK 16FF0..16FF1 ; Mc # [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1D165..1D166 ; Mc # [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Mc # [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D250..1D252 ; Mc # [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 -# Total code points: 468 +# Total code points: 471 # ================================================ @@ -4236,6 +4238,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1D1AE..1D1EA ; So # [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON 1D200..1D241 ; So # [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; So # GREEK MUSICAL LEIMMA +1D253..1D255 ; So # [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D300..1D356 ; So # [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING 1D800..1D9FF ; So # [512] SIGNWRITING HAND-FIST INDEX..SIGNWRITING HEAD 1DA37..1DA3A ; So # [4] SIGNWRITING AIR BLOW SMALL ROTATIONS..SIGNWRITING BREATH EXHALE @@ -4283,7 +4286,7 @@ FFFC..FFFD ; So # [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARACTER 1FB00..1FB92 ; So # [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; So # [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 7376 +# Total code points: 7379 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt index 866536783..98fcbaf06 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedLineBreak.txt @@ -1,5 +1,5 @@ # DerivedLineBreak-16.0.0.txt -# Date: 2024-05-11, 16:57:14 GMT +# Date: 2024-07-25, 15:19:45 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -69,8 +69,8 @@ E000..F8FF ; XX # Co [6400] .. F0000..FFFFD ; XX # Co [65534] .. 100000..10FFFD; XX # Co [65534] .. -# The above property value applies to 757653 code points not listed here. -# Total code points: 895121 +# The above property value applies to 757647 code points not listed here. +# Total code points: 895115 # ================================================ @@ -1468,6 +1468,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1D1AE..1D1EA ; AL # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON 1D200..1D241 ; AL # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; AL # So GREEK MUSICAL LEIMMA +1D253..1D255 ; AL # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; AL # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; AL # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D300..1D356 ; AL # So [87] MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING @@ -1615,7 +1616,7 @@ FFED..FFEE ; AL # So [2] HALFWIDTH BLACK SQUARE..HALFWIDTH WHITE CIRCLE 1FB00..1FB92 ; AL # So [147] BLOCK SEXTANT-1..UPPER HALF INVERSE MEDIUM SHADE AND LOWER HALF BLOCK 1FB94..1FBEF ; AL # So [92] LEFT HALF INVERSE MEDIUM SHADE AND RIGHT HALF BLOCK..TOP LEFT JUSTIFIED LOWER RIGHT QUARTER BLACK CIRCLE -# Total code points: 26690 +# Total code points: 26693 # ================================================ @@ -2366,6 +2367,7 @@ FFF9..FFFB ; CM # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTAT 1D185..1D18B ; CM # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; CM # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; CM # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; CM # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1DA00..1DA36 ; CM # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN 1DA3B..1DA6C ; CM # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT 1DA75 ; CM # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS @@ -2389,7 +2391,7 @@ E0001 ; CM # Cf LANGUAGE TAG E0020..E007F ; CM # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; CM # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2470 +# Total code points: 2473 # ================================================ diff --git a/unicodetools/data/ucd/dev/extracted/DerivedName.txt b/unicodetools/data/ucd/dev/extracted/DerivedName.txt index 07b0176b5..b4ab4cc21 100644 --- a/unicodetools/data/ucd/dev/extracted/DerivedName.txt +++ b/unicodetools/data/ucd/dev/extracted/DerivedName.txt @@ -1,5 +1,5 @@ # DerivedName-16.0.0.txt -# Date: 2024-04-30, 21:48:18 GMT +# Date: 2024-07-25, 15:19:46 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -39604,6 +39604,12 @@ FFFD ; REPLACEMENT CHARACTER 1D243 ; COMBINING GREEK MUSICAL TETRASEME 1D244 ; COMBINING GREEK MUSICAL PENTASEME 1D245 ; GREEK MUSICAL LEIMMA +1D250 ; MUSICAL SYMBOL COMBINING FLAG-6 +1D251 ; MUSICAL SYMBOL COMBINING FLAG-7 +1D252 ; MUSICAL SYMBOL COMBINING FLAG-8 +1D253 ; MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST +1D254 ; MUSICAL SYMBOL FIVE HUNDRED TWELFTH REST +1D255 ; MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0 ; KAKTOVIK NUMERAL ZERO 1D2C1 ; KAKTOVIK NUMERAL ONE 1D2C2 ; KAKTOVIK NUMERAL TWO @@ -45367,6 +45373,6 @@ E01ED ; VARIATION SELECTOR-254 E01EE ; VARIATION SELECTOR-255 E01EF ; VARIATION SELECTOR-256 -# Total code points: 154998 +# Total code points: 155004 # EOF From b2713701b3aeb573757b263f9c277f617323fada Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:21:02 +0200 Subject: [PATCH 07/14] GenerateEnums --- .../src/main/java/org/unicode/props/UcdPropertyValues.java | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java index 37020e727..f2ea2feab 100644 --- a/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java +++ b/unicodetools/src/main/java/org/unicode/props/UcdPropertyValues.java @@ -394,6 +394,7 @@ public enum Block_Values implements Named { Mro("Mro"), Multani("Multani"), Musical_Symbols("Music"), + Musical_Symbols_Supplement("Music_Sup"), Myanmar("Myanmar"), Myanmar_Extended_A("Myanmar_Ext_A"), Myanmar_Extended_B("Myanmar_Ext_B"), From 778542226ace80de5dcbade17737bdb6553aedc7 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:48:04 +0200 Subject: [PATCH 08/14] Other_Grapheme_Extend for consistency of LGCs with canonical equivalence --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index fae2831e7..bbf3e48a5 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +1D250..1D252; Other_Grapheme_Extend # PropList-16.0.0.txt # Date: 2024-05-31, 18:09:48 GMT # © 2024 Unicode®, Inc. From ddf11017e8d0d4fefb0687aa9ef2f5d78d28b40e Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:52:05 +0200 Subject: [PATCH 09/14] =?UTF-8?q?A=20highly=20unreadable=20invariant?= =?UTF-8?q?=E2=80=A6?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../org/unicode/text/UCD/UnicodeInvariantTest.txt | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt index 16ef91ae4..0739510d8 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/UnicodeInvariantTest.txt @@ -657,7 +657,12 @@ Let $anyNumericValue := \p{Numeric_Value=/-?[0-9]+(.[0-9]+)?/} # Musical symbol combining marks, other oddities -Let $AlphaExclusions := [[\uAA7D \u0F3E\u0F3F\u1063\u1064\u1069-\u106D\u1087-\u108C\u108F\u109A\u109B\u1CE1\u1CF7\uAA7B\uABEC\U0001D165\U0001D166\U0001D16D-\U0001D172][[:gc=mc:]&[:ccc=9:][\u302E\u302F]]] +Let $AlphaExclusions := [ + # TODO(egg): Make this list legible. + [\uAA7D \u0F3E\u0F3F\u1063\u1064\u1069-\u106D\u1087-\u108C\u108F\u109A\u109B\u1CE1\u1CF7\uAA7B\uABEC\U0001D165\U0001D166\U0001D16D-\U0001D172] + [[:gc=mc:]&[:ccc=9:][\u302E\u302F]] + \p{Name=/^MUSICAL SYMBOL COMBINING FLAG-/} +] # 6.1.0 Added HANGUL SINGLE DOT TONE MARK..HANGUL DOUBLE DOT TONE MARK # 7.0 Added AA7D # 10.0 Added 1CF7 (similar to 1CE1) From e34ee587a8b36bb1a7ec65eb21b049721c6b4fe4 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Thu, 25 Jul 2024 17:55:48 +0200 Subject: [PATCH 10/14] Regenerate UCD --- unicodetools/data/ucd/dev/DerivedCoreProperties.txt | 11 ++++++----- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- .../data/ucd/dev/auxiliary/GraphemeBreakProperty.txt | 8 ++++---- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt index 6806d4a02..558829065 100644 --- a/unicodetools/data/ucd/dev/DerivedCoreProperties.txt +++ b/unicodetools/data/ucd/dev/DerivedCoreProperties.txt @@ -1,5 +1,5 @@ # DerivedCoreProperties-16.0.0.txt -# Date: 2024-07-25, 15:19:41 GMT +# Date: 2024-07-25, 15:54:00 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -11009,6 +11009,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. 1D185..1D18B ; Grapheme_Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Grapheme_Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; Grapheme_Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; Grapheme_Extend # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1DA00..1DA36 ; Grapheme_Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN 1DA3B..1DA6C ; Grapheme_Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT 1DA75 ; Grapheme_Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS @@ -11031,7 +11032,7 @@ FF9E..FF9F ; Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK. E0020..E007F ; Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Grapheme_Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2193 +# Total code points: 2196 # ================================================ @@ -12640,7 +12641,6 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 1D1AE..1D1EA ; Grapheme_Base # So [61] MUSICAL SYMBOL PEDAL MARK..MUSICAL SYMBOL KORON 1D200..1D241 ; Grapheme_Base # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D245 ; Grapheme_Base # So GREEK MUSICAL LEIMMA -1D250..1D252 ; Grapheme_Base # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1D253..1D255 ; Grapheme_Base # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; Grapheme_Base # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; Grapheme_Base # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN @@ -12816,7 +12816,7 @@ FFFC..FFFD ; Grapheme_Base # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEME 30000..3134A ; Grapheme_Base # Lo [4939] CJK UNIFIED IDEOGRAPH-30000..CJK UNIFIED IDEOGRAPH-3134A 31350..323AF ; Grapheme_Base # Lo [4192] CJK UNIFIED IDEOGRAPH-31350..CJK UNIFIED IDEOGRAPH-323AF -# Total code points: 152736 +# Total code points: 152733 # ================================================ @@ -13338,6 +13338,7 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA 1D185..1D18B ; InCB; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; InCB; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; InCB; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; InCB; Extend # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1DA00..1DA36 ; InCB; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN 1DA3B..1DA6C ; InCB; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT 1DA75 ; InCB; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS @@ -13361,6 +13362,6 @@ FF9E..FF9F ; InCB; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HA E0020..E007F ; InCB; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; InCB; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2192 +# Total code points: 2195 # EOF diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index bbf3e48a5..e91bf1131 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,6 +1,5 @@ -1D250..1D252; Other_Grapheme_Extend # PropList-16.0.0.txt -# Date: 2024-05-31, 18:09:48 GMT +# Date: 2024-07-25, 15:54:40 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1316,9 +1315,10 @@ FF9E..FF9F ; Other_Grapheme_Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND 16FF0..16FF1 ; Other_Grapheme_Extend # Mc [2] VIETNAMESE ALTERNATE READING MARK CA..VIETNAMESE ALTERNATE READING MARK NHAY 1D165..1D166 ; Other_Grapheme_Extend # Mc [2] MUSICAL SYMBOL COMBINING STEM..MUSICAL SYMBOL COMBINING SPRECHGESANG STEM 1D16D..1D172 ; Other_Grapheme_Extend # Mc [6] MUSICAL SYMBOL COMBINING AUGMENTATION DOT..MUSICAL SYMBOL COMBINING FLAG-5 +1D250..1D252 ; Other_Grapheme_Extend # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 E0020..E007F ; Other_Grapheme_Extend # Cf [96] TAG SPACE..CANCEL TAG -# Total code points: 160 +# Total code points: 163 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt index fb56b975c..e62b52459 100644 --- a/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/GraphemeBreakProperty.txt @@ -1,5 +1,5 @@ # GraphemeBreakProperty-16.0.0.txt -# Date: 2024-07-25, 15:19:48 GMT +# Date: 2024-07-25, 15:54:14 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -472,6 +472,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT 1D185..1D18B ; Extend # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Extend # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO 1D242..1D244 ; Extend # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME +1D250..1D252 ; Extend # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1DA00..1DA36 ; Extend # Mn [55] SIGNWRITING HEAD RIM..SIGNWRITING AIR SUCKING IN 1DA3B..1DA6C ; Extend # Mn [50] SIGNWRITING MOUTH CLOSED NEUTRAL..SIGNWRITING EXCITEMENT 1DA75 ; Extend # Mn SIGNWRITING UPPER BODY TILTING FROM HIP JOINTS @@ -495,7 +496,7 @@ FF9E..FF9F ; Extend # Lm [2] HALFWIDTH KATAKANA VOICED SOUND MARK..HALFWIDT E0020..E007F ; Extend # Cf [96] TAG SPACE..CANCEL TAG E0100..E01EF ; Extend # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 -# Total code points: 2198 +# Total code points: 2201 # ================================================ @@ -660,9 +661,8 @@ ABEC ; SpacingMark # Mc MEETEI MAYEK LUM IYEK 11F3E..11F3F ; SpacingMark # Mc [2] KAWI VOWEL SIGN E..KAWI VOWEL SIGN AI 1612A..1612C ; SpacingMark # Mc [3] GURUNG KHEMA CONSONANT SIGN MEDIAL YA..GURUNG KHEMA CONSONANT SIGN MEDIAL HA 16F51..16F87 ; SpacingMark # Mc [55] MIAO SIGN ASPIRATION..MIAO VOWEL SIGN UI -1D250..1D252 ; SpacingMark # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 -# Total code points: 381 +# Total code points: 378 # ================================================ From f94a4af01211bcbaa122446eac048790bd179ae2 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 7 Aug 2024 19:10:01 +0200 Subject: [PATCH 11/14] Tests that find issues --- .../org/unicode/text/UCD/AdditionComparisons.txt | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt index 9db1fdf1d..6a06d82b5 100644 --- a/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt +++ b/unicodetools/src/main/resources/org/unicode/text/UCD/AdditionComparisons.txt @@ -35,7 +35,19 @@ CorrespondTo [\N{OLD HUNGARIAN SMALL LETTER A}] ## Provisionally assigned. [placeholder for draft PRs] # Ignore the security and IDNA properties, as these are not yet included for provisionally assigned characters. -Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status: +Ignoring Confusable_MA Identifier_Status Identifier_Type Idn_Status Idn_2008: + +Ignoring Block: +Propertywise [\N{MUSICAL SYMBOL COMBINING FLAG-5} + \N{MUSICAL SYMBOL COMBINING FLAG-6} + \N{MUSICAL SYMBOL COMBINING FLAG-7} + \N{MUSICAL SYMBOL COMBINING FLAG-8}] AreAlike + +Propertywise [𝅂 + \N{MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST} + \N{MUSICAL SYMBOL FIVE HUNDRED TWELFTH REST} + \N{MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST}] AreAlike +end Ignoring; end Ignoring; From 5c0cb7e3c30263b6515d50f91ee8f3ace82f0b86 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 7 Aug 2024 19:11:06 +0200 Subject: [PATCH 12/14] vo=U --- unicodetools/data/ucd/dev/VerticalOrientation.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/unicodetools/data/ucd/dev/VerticalOrientation.txt b/unicodetools/data/ucd/dev/VerticalOrientation.txt index 35b86d140..e8877ab93 100644 --- a/unicodetools/data/ucd/dev/VerticalOrientation.txt +++ b/unicodetools/data/ucd/dev/VerticalOrientation.txt @@ -2254,8 +2254,8 @@ FFFC..FFFD ; U # So [2] OBJECT REPLACEMENT CHARACTER..REPLACEMENT CHARA 1D200..1D241 ; R # So [66] GREEK VOCAL NOTATION SYMBOL-1..GREEK INSTRUMENTAL NOTATION SYMBOL-54 1D242..1D244 ; R # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME 1D245 ; R # So GREEK MUSICAL LEIMMA -1D250..1D252 ; R # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 -1D253..1D255 ; R # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST +1D250..1D252 ; U # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 +1D253..1D255 ; U # So [3] MUSICAL SYMBOL TWO HUNDRED FIFTY-SIXTH REST..MUSICAL SYMBOL ONE THOUSAND TWENTY-FOURTH REST 1D2C0..1D2D3 ; R # No [20] KAKTOVIK NUMERAL ZERO..KAKTOVIK NUMERAL NINETEEN 1D2E0..1D2F3 ; U # No [20] MAYAN NUMERAL ZERO..MAYAN NUMERAL NINETEEN 1D2F4..1D2FF ; U # Cn [12] .. From be716aa3758ae3a606f20897d40738e6a42e5eb3 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 7 Aug 2024 19:12:23 +0200 Subject: [PATCH 13/14] Diacritic for flags --- unicodetools/data/ucd/dev/PropList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index e91bf1131..ee050fdd0 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,3 +1,4 @@ +1D250..1D252; Diacritic # PropList-16.0.0.txt # Date: 2024-07-25, 15:54:40 GMT # © 2024 Unicode®, Inc. From fa424e43ffa293c4b701af3c767a0d325e1452d9 Mon Sep 17 00:00:00 2001 From: Robin Leroy Date: Wed, 7 Aug 2024 19:15:02 +0200 Subject: [PATCH 14/14] Regenerate UCD --- unicodetools/data/ucd/dev/PropList.txt | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index ee050fdd0..a3841070c 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,6 +1,5 @@ -1D250..1D252; Diacritic # PropList-16.0.0.txt -# Date: 2024-07-25, 15:54:40 GMT +# Date: 2024-08-07, 17:13:53 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -1142,6 +1141,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1D17B..1D182 ; Diacritic # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE 1D185..1D18B ; Diacritic # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE 1D1AA..1D1AD ; Diacritic # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO +1D250..1D252 ; Diacritic # Mc [3] MUSICAL SYMBOL COMBINING FLAG-6..MUSICAL SYMBOL COMBINING FLAG-8 1E030..1E06D ; Diacritic # Lm [62] MODIFIER LETTER CYRILLIC SMALL A..MODIFIER LETTER CYRILLIC SMALL STRAIGHT U WITH STROKE 1E130..1E136 ; Diacritic # Mn [7] NYIAKENG PUACHUE HMONG TONE-B..NYIAKENG PUACHUE HMONG TONE-D 1E2AE ; Diacritic # Mn TOTO SIGN RISING TONE @@ -1151,7 +1151,7 @@ FFE3 ; Diacritic # Sk FULLWIDTH MACRON 1E944..1E946 ; Diacritic # Mn [3] ADLAM ALIF LENGTHENER..ADLAM GEMINATION MARK 1E948..1E94A ; Diacritic # Mn [3] ADLAM CONSONANT MODIFIER..ADLAM NUKTA -# Total code points: 1178 +# Total code points: 1181 # ================================================