From a8e8f5e1ffdfab4b561515005f8d0f8941a07a5b Mon Sep 17 00:00:00 2001 From: Josh Hadley <51727949+josh-hadley@users.noreply.github.com> Date: Wed, 15 May 2024 17:27:23 -0700 Subject: [PATCH] Semicolons; revisited (#819) Reduce the set of semicolons in SContinue to include only U+003B and its compatibility equivalents and regenerated SentenceBreakProperty.txt. Spurred by https://www.unicode.org/review/pri494/feedback.html#ID20240509103608 and discussion amongst PAG members. --- .../data/ucd/dev/auxiliary/SentenceBreakProperty.txt | 10 ++-------- .../unicode/text/UCD/ToolUnicodePropertySource.java | 8 +------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt index c2948f09f..39fdb57c0 100644 --- a/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt +++ b/unicodetools/data/ucd/dev/auxiliary/SentenceBreakProperty.txt @@ -1,5 +1,5 @@ # SentenceBreakProperty-16.0.0.txt -# Date: 2024-05-08, 04:00:01 GMT +# Date: 2024-05-13, 20:53:44 GMT # © 2024 Unicode®, Inc. # Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries. # For terms of use and license, see https://www.unicode.org/terms_of_use.html @@ -2964,16 +2964,11 @@ FF63 ; Close # Pe HALFWIDTH RIGHT CORNER BRACKET 037E ; SContinue # Po GREEK QUESTION MARK 055D ; SContinue # Po ARMENIAN COMMA 060C..060D ; SContinue # Po [2] ARABIC COMMA..ARABIC DATE SEPARATOR -061B ; SContinue # Po ARABIC SEMICOLON 07F8 ; SContinue # Po NKO COMMA -1364 ; SContinue # Po ETHIOPIC SEMICOLON 1802 ; SContinue # Po MONGOLIAN COMMA 1808 ; SContinue # Po MONGOLIAN MANCHU COMMA 2013..2014 ; SContinue # Pd [2] EN DASH..EM DASH -204F ; SContinue # Po REVERSED SEMICOLON -2E35 ; SContinue # Po TURNED SEMICOLON 3001 ; SContinue # Po IDEOGRAPHIC COMMA -A6F6 ; SContinue # Po BAMUM SEMICOLON FE10..FE11 ; SContinue # Po [2] PRESENTATION FORM FOR VERTICAL COMMA..PRESENTATION FORM FOR VERTICAL IDEOGRAPHIC COMMA FE13..FE14 ; SContinue # Po [2] PRESENTATION FORM FOR VERTICAL COLON..PRESENTATION FORM FOR VERTICAL SEMICOLON FE31..FE32 ; SContinue # Pd [2] PRESENTATION FORM FOR VERTICAL EM DASH..PRESENTATION FORM FOR VERTICAL EN DASH @@ -2985,8 +2980,7 @@ FF0C ; SContinue # Po FULLWIDTH COMMA FF0D ; SContinue # Pd FULLWIDTH HYPHEN-MINUS FF1A..FF1B ; SContinue # Po [2] FULLWIDTH COLON..FULLWIDTH SEMICOLON FF64 ; SContinue # Po HALFWIDTH IDEOGRAPHIC COMMA -1DA89 ; SContinue # Po SIGNWRITING SEMICOLON -# Total code points: 37 +# Total code points: 31 # EOF diff --git a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java index 8076978f6..c45a9be94 100644 --- a/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java +++ b/unicodetools/src/main/java/org/unicode/text/UCD/ToolUnicodePropertySource.java @@ -1494,15 +1494,9 @@ public int getMaxWidth(boolean isShort) { "]") .add(0x003B) .add(0x037E) - .add(0x061B) - .add(0x1364) - .add(0x204F) - .add(0x2E35) - .add(0xA6F6) .add(0xFE14) .add(0xFE54) - .add(0xFF1B) - .add(0x1DA89), + .add(0xFF1B), "SContinue"); // unicodeMap.putAll(graphemeExtend, "Other"); // to verify that none // of the above touch it.