diff --git a/unicodetools/data/ucd/dev/PropList.txt b/unicodetools/data/ucd/dev/PropList.txt index 73a856768..395aa448d 100644 --- a/unicodetools/data/ucd/dev/PropList.txt +++ b/unicodetools/data/ucd/dev/PropList.txt @@ -1,5 +1,5 @@ # PropList-16.0.0.txt -# Date: 2024-05-06, 12:17:26 GMT +# Date: 2024-05-08, 03:40:06 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -160,6 +160,7 @@ FF63 ; Quotation_Mark # Pe HALFWIDTH RIGHT CORNER BRACKET 2024 ; Terminal_Punctuation # Po ONE DOT LEADER 203C..203D ; Terminal_Punctuation # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG 2047..2049 ; Terminal_Punctuation # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2CF9..2CFB ; Terminal_Punctuation # Po [3] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN INDIRECT QUESTION MARK 2E2E ; Terminal_Punctuation # Po REVERSED QUESTION MARK 2E3C ; Terminal_Punctuation # Po STENOGRAPHIC FULL STOP 2E41 ; Terminal_Punctuation # Po REVERSED COMMA @@ -178,6 +179,8 @@ AA5D..AA5F ; Terminal_Punctuation # Po [3] CHAM PUNCTUATION DANDA..CHAM PUN AADF ; Terminal_Punctuation # Po TAI VIET SYMBOL KOI KOI AAF0..AAF1 ; Terminal_Punctuation # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Terminal_Punctuation # Po MEETEI MAYEK CHEIKHEI +FE12 ; Terminal_Punctuation # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE15..FE16 ; Terminal_Punctuation # Po [2] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE50..FE52 ; Terminal_Punctuation # Po [3] SMALL COMMA..SMALL FULL STOP FE54..FE57 ; Terminal_Punctuation # Po [4] SMALL SEMICOLON..SMALL EXCLAMATION MARK FF01 ; Terminal_Punctuation # Po FULLWIDTH EXCLAMATION MARK @@ -231,7 +234,7 @@ FF64 ; Terminal_Punctuation # Po HALFWIDTH IDEOGRAPHIC COMMA 1BC9F ; Terminal_Punctuation # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA87..1DA8A ; Terminal_Punctuation # Po [4] SIGNWRITING COMMA..SIGNWRITING COLON -# Total code points: 285 +# Total code points: 291 # ================================================ @@ -1543,6 +1546,7 @@ FF65 ; Other_ID_Continue # Po HALFWIDTH KATAKANA MIDDLE DOT 2024 ; Sentence_Terminal # Po ONE DOT LEADER 203C..203D ; Sentence_Terminal # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG 2047..2049 ; Sentence_Terminal # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2CF9..2CFB ; Sentence_Terminal # Po [3] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN INDIRECT QUESTION MARK 2E2E ; Sentence_Terminal # Po REVERSED QUESTION MARK 2E3C ; Sentence_Terminal # Po STENOGRAPHIC FULL STOP 2E53..2E54 ; Sentence_Terminal # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK @@ -1558,6 +1562,8 @@ A9C8..A9C9 ; Sentence_Terminal # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA AA5D..AA5F ; Sentence_Terminal # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA AAF0..AAF1 ; Sentence_Terminal # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; Sentence_Terminal # Po MEETEI MAYEK CHEIKHEI +FE12 ; Sentence_Terminal # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE15..FE16 ; Sentence_Terminal # Po [2] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE52 ; Sentence_Terminal # Po SMALL FULL STOP FE56..FE57 ; Sentence_Terminal # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK FF01 ; Sentence_Terminal # Po FULLWIDTH EXCLAMATION MARK @@ -1598,7 +1604,7 @@ FF61 ; Sentence_Terminal # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; Sentence_Terminal # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; Sentence_Terminal # Po SIGNWRITING FULL STOP -# Total code points: 164 +# Total code points: 170 # ================================================ diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index 7f0c90d8f..c2948f09f 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-05-06, 12:18:03 GMT +# Date: 2024-05-08, 04:00:01 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2710,6 +2710,7 @@ FF0E ; ATerm # Po FULLWIDTH FULL STOP 1C7E..1C7F ; STerm # Po [2] OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD 203C..203D ; STerm # Po [2] DOUBLE EXCLAMATION MARK..INTERROBANG 2047..2049 ; STerm # Po [3] DOUBLE QUESTION MARK..EXCLAMATION QUESTION MARK +2CF9..2CFB ; STerm # Po [3] COPTIC OLD NUBIAN FULL STOP..COPTIC OLD NUBIAN INDIRECT QUESTION MARK 2E2E ; STerm # Po REVERSED QUESTION MARK 2E3C ; STerm # Po STENOGRAPHIC FULL STOP 2E53..2E54 ; STerm # Po [2] MEDIEVAL EXCLAMATION MARK..MEDIEVAL QUESTION MARK @@ -2725,6 +2726,8 @@ A9C8..A9C9 ; STerm # Po [2] JAVANESE PADA LINGSA..JAVANESE PADA LUNGSI AA5D..AA5F ; STerm # Po [3] CHAM PUNCTUATION DANDA..CHAM PUNCTUATION TRIPLE DANDA AAF0..AAF1 ; STerm # Po [2] MEETEI MAYEK CHEIKHAN..MEETEI MAYEK AHANG KHUDAM ABEB ; STerm # Po MEETEI MAYEK CHEIKHEI +FE12 ; STerm # Po PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC FULL STOP +FE15..FE16 ; STerm # Po [2] PRESENTATION FORM FOR VERTICAL EXCLAMATION MARK..PRESENTATION FORM FOR VERTICAL QUESTION MARK FE56..FE57 ; STerm # Po [2] SMALL QUESTION MARK..SMALL EXCLAMATION MARK FF01 ; STerm # Po FULLWIDTH EXCLAMATION MARK FF1F ; STerm # Po FULLWIDTH QUESTION MARK @@ -2763,7 +2766,7 @@ FF61 ; STerm # Po HALFWIDTH IDEOGRAPHIC FULL STOP 1BC9F ; STerm # Po DUPLOYAN PUNCTUATION CHINOOK FULL STOP 1DA88 ; STerm # Po SIGNWRITING FULL STOP -# Total code points: 160 +# Total code points: 166 # ================================================ @@ -2957,26 +2960,33 @@ FF63 ; Close # Pe HALFWIDTH RIGHT CORNER BRACKET 002C ; SContinue # Po COMMA 002D ; SContinue # Pd HYPHEN-MINUS -003A ; SContinue # Po COLON +003A..003B ; SContinue # Po [2] COLON..SEMICOLON +037E ; SContinue # Po GREEK QUESTION MARK 055D ; SContinue # Po ARMENIAN COMMA 060C..060D ; SContinue # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR +061B ; SContinue # Po ARABIC SEMICOLON 07F8 ; SContinue # Po NKO COMMA +1364 ; SContinue # Po ETHIOPIC SEMICOLON 1802 ; SContinue # Po MONGOLIAN COMMA 1808 ; SContinue # Po MONGOLIAN MANCHU COMMA 2013..2014 ; SContinue # Pd [2] EN DASH..EM DASH +204F ; SContinue # Po REVERSED SEMICOLON +2E35 ; SContinue # Po TURNED SEMICOLON 3001 ; SContinue # Po IDEOGRAPHIC COMMA +A6F6 ; SContinue # Po BAMUM SEMICOLON FE10..FE11 ; SContinue # Po [2] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA -FE13 ; SContinue # Po PRESENTATION FORM FOR VERTICAL COLON +FE13..FE14 ; SContinue # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON FE31..FE32 ; SContinue # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH FE50..FE51 ; SContinue # Po [2] SMALL COMMA..SMALL IDEOGRAPHIC COMMA -FE55 ; SContinue # Po SMALL COLON +FE54..FE55 ; SContinue # Po [2] SMALL SEMICOLON..SMALL COLON FE58 ; SContinue # Pd SMALL EM DASH FE63 ; SContinue # Pd SMALL HYPHEN-MINUS FF0C ; SContinue # Po FULLWIDTH COMMA FF0D ; SContinue # Pd FULLWIDTH HYPHEN-MINUS -FF1A ; SContinue # Po FULLWIDTH COLON +FF1A..FF1B ; SContinue # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON FF64 ; SContinue # Po HALFWIDTH IDEOGRAPHIC COMMA +1DA89 ; SContinue # Po SIGNWRITING SEMICOLON -# Total code points: 26 +# Total code points: 37 # EOF diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java index fbca38076..8076978f6 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java @@ -1464,6 +1464,7 @@ public int getMaxWidth(boolean isShort) { unicodeMap.putAll( getProperty("STerm") .getSet(UCD_Names.YES) + .addAll(new UnicodeSet("[\\u2CF9\\u2CFA\\u2CFB\\uFE12\\uFE15\\uFE16]")) .removeAll(unicodeMap.keySet("ATerm")), "STerm"); unicodeMap.putAll( @@ -1476,21 +1477,32 @@ public int getMaxWidth(boolean isShort) { "Close"); unicodeMap.putAll( new UnicodeSet( - "[\\u002C\\u3001\\uFE10\\uFE11\\uFF0C" - + "\\uFE50\\uFF64\\uFE51\\uFE51\\u055D\\u060C\\u060D\\u07F8\\u1802\\u1808" - + // new - // from - // L2/08-029 - "\\u003A\\uFE13\\uFF1A" - + "\\uFE55" - + // new from L2/08-029 - // "\\u003B\\uFE14\\uFF1B" + - "\\u2014\\uFE31\\u002D\\uFF0D" - + "\\u2013\\uFE32\\uFE58\\uFE63" - + // new - // from - // L2/08-029 - "]"), + "[\\u002C\\u3001\\uFE10\\uFE11\\uFF0C" + + "\\uFE50\\uFF64\\uFE51\\uFE51\\u055D\\u060C\\u060D\\u07F8\\u1802\\u1808" + + // new + // from + // L2/08-029 + "\\u003A\\uFE13\\uFF1A" + + "\\uFE55" + + // new from L2/08-029 + // "\\u003B\\uFE14\\uFF1B" + + "\\u2014\\uFE31\\u002D\\uFF0D" + + "\\u2013\\uFE32\\uFE58\\uFE63" + + // new + // from + // L2/08-029 + "]") + .add(0x003B) + .add(0x037E) + .add(0x061B) + .add(0x1364) + .add(0x204F) + .add(0x2E35) + .add(0xA6F6) + .add(0xFE14) + .add(0xFE54) + .add(0xFF1B) + .add(0x1DA89), "SContinue"); // unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none // of the above touch it.