diff --git a/common/main/fr.xml b/common/main/fr.xml index 51b9019c681..8508a118698 100644 --- a/common/main/fr.xml +++ b/common/main/fr.xml @@ -11702,7 +11702,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ ↑↑↑ {0} kcal - {0}  kcal + {0} kcal ↑↑↑ diff --git a/common/main/hi_Latn.xml b/common/main/hi_Latn.xml index 295c7060347..219942bfcb1 100644 --- a/common/main/hi_Latn.xml +++ b/common/main/hi_Latn.xml @@ -2211,14 +2211,14 @@ annotations. G y MMM d–d G y MMM d – G y MMM d - G y MMM d  – MMM d - G y MMM d –  y d MMM + G y MMM d – MMM d + G y MMM d – y d MMM G y MMM d, E – MMM d, E - G y MMM d, E –  G y MMM d, E - G y MMM d, E –  MMM d, E - G y MMM d, E –  y MMM d, E + G y MMM d, E – G y MMM d, E + G y MMM d, E – MMM d, E + G y MMM d, E – y MMM d, E ↑↑↑ diff --git a/common/main/ku.xml b/common/main/ku.xml index 4fd6e802a2e..2c95822fe33 100644 --- a/common/main/ku.xml +++ b/common/main/ku.xml @@ -1186,9 +1186,9 @@ CLDR data files are interpreted according to the LDML specification (http://unic G d'ê' MMM'a' y'an' – d'ê' MMM'a' y'an' - G d'ê' MMM'a' y'an' E  – d'ê' MMM'a' y'an' E - G d'ê' MMM'a' y'an' E  – d'ê' MMM'a' y'an' E - G d'ê' MMM'a' y'an' E  – d'ê' MMM'a' y'an' E + G d'ê' MMM'a' y'an' E – d'ê' MMM'a' y'an' E + G d'ê' MMM'a' y'an' E – d'ê' MMM'a' y'an' E + G d'ê' MMM'a' y'an' E – d'ê' MMM'a' y'an' E G MMMM – MMMM y @@ -1722,9 +1722,9 @@ CLDR data files are interpreted according to the LDML specification (http://unic d'ê' MMM'a' y'an' – d'ê' MMM'a' y'an' - d'ê' MMM'a' y'an' E  – d'ê' MMM'a' y'an' E - d'ê' MMM'a' y'an' E  – d'ê' MMM'a' y'an' E - d'ê' MMM'a' y'an' E  – d'ê' MMM'a' y'an' E + d'ê' MMM'a' y'an' E – d'ê' MMM'a' y'an' E + d'ê' MMM'a' y'an' E – d'ê' MMM'a' y'an' E + d'ê' MMM'a' y'an' E – d'ê' MMM'a' y'an' E MMMM – MMMM y diff --git a/common/main/pap.xml b/common/main/pap.xml index 5c8190cf070..8ab1eec7e3b 100644 --- a/common/main/pap.xml +++ b/common/main/pap.xml @@ -388,7 +388,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic E, d MMM – d MMM y G - E, d MMM y G  – E, d MMM y G + E, d MMM y G – E, d MMM y G E, d MMM – E, d MMM y G E, d MMM y – E, d MMM y G @@ -905,7 +905,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic d–d MMM - d MMM  – d MMM + d MMM – d MMM E, d MMM – E, d MMM diff --git a/common/main/root.xml b/common/main/root.xml index 56cf38d18d4..d1897cf7317 100644 --- a/common/main/root.xml +++ b/common/main/root.xml @@ -2907,6 +2907,9 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ St. John’s + + Curaçao + Asmara @@ -2955,9 +2958,18 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ Kathmandu + + Asunción + + + Réunion + St. Helena + + São Tomé + Lower Prince’s Quarter @@ -3006,18 +3018,6 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ Ho Chi Minh - - Curaçao - - - Asunción - - - Réunion - - - São Tomé - diff --git a/common/main/syr.xml b/common/main/syr.xml index b69844febb8..50c642eb6cd 100644 --- a/common/main/syr.xml +++ b/common/main/syr.xml @@ -1200,7 +1200,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic ↑↑↑ - {0}  –  {1} + {0} – {1} ↑↑↑ ↑↑↑ diff --git a/common/main/vi.xml b/common/main/vi.xml index 7030188decc..d8eaffc37c5 100644 --- a/common/main/vi.xml +++ b/common/main/vi.xml @@ -2523,15 +2523,15 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d/M/y – d/M/y GGGGG - d/M/y GGGGG  –  d/M/y GGGGG + d/M/y GGGGG – d/M/y GGGGG d/M/y – d/M/y GGGGG d/M/y – d/M/y GGGGG - E, d/M/y  –  E, d/M/y GGGGG - E, d/M/y GGGGG  –  E, d/M/y GGGGG - E, d/M/y  –  E, d/M/y GGGGG - E, d/M/y  –  E, d/M/y GGGGG + E, d/M/y – E, d/M/y GGGGG + E, d/M/y GGGGG – E, d/M/y GGGGG + E, d/M/y – E, d/M/y GGGGG + E, d/M/y – E, d/M/y GGGGG MMM y G – MMM y G @@ -2590,19 +2590,19 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d/M – d/M - E, d/M  –  E, d/M - E, d/M  –  E, d/M + E, d/M – E, d/M + E, d/M – E, d/M - MMM  –  MMM + MMM – MMM d – d MMM - d MMM  –  d MMM + d MMM – d MMM - E, d MMM  –  E, d MMM - E, d MMM  –  E, d MMM + E, d MMM – E, d MMM + E, d MMM – E, d MMM y–y G @@ -2617,26 +2617,26 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d/M/y – d/M/y GGGGG - E, d/M/y  –  E, d/M/y GGGGG - E, d/M/y  –  E, d/M/y GGGGG - E, d/M/y  –  E, d/M/y GGGGG + E, d/M/y – E, d/M/y GGGGG + E, d/M/y – E, d/M/y GGGGG + E, d/M/y – E, d/M/y GGGGG - MMM  –  MMM y G - MMM y  –  MMM y G + MMM – MMM y G + MMM y – MMM y G d – d MMM, y G - d MMM  –  d MMM, y G - d MMM, y  –  d MMM, y G + d MMM – d MMM, y G + d MMM, y – d MMM, y G - E, d MMM  –  E, d MMM, y G - E, d MMM  –  E, d MMM, y G - E, d MMM, y  –  E, d MMM, y G + E, d MMM – E, d MMM, y G + E, d MMM – E, d MMM, y G + E, d MMM, y – E, d MMM, y G - MMMM  –  MMMM y G + MMMM – MMMM y G MMMM y – MMMM y G @@ -3081,7 +3081,7 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ ↑↑↑ - h 'giờ' B  –  h 'giờ' B + h 'giờ' B – h 'giờ' B h – h 'giờ' B @@ -3097,24 +3097,24 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ y – y G - M/y G  –  M/y G + M/y G – M/y G M/y– M/y G M/y – M/y G d/M/y – d/M/y G - d/M/y G  –  d/M/y G + d/M/y G – d/M/y G d/M/y – d/M/y G d/M/y – d/M/y G - E, d/M/y  –  E, d/M/y G - E, d/M/y G  –  E, d/M/y G - E, d/M/y  –  E, d/M/y G - E, d/M/y  –  E, d/M/y G + E, d/M/y – E, d/M/y G + E, d/M/y G – E, d/M/y G + E, d/M/y – E, d/M/y G + E, d/M/y – E, d/M/y G - MMM y G  –  MMM y G + MMM y G – MMM y G MMM – MMM y G MMM y – MMM y G @@ -3170,15 +3170,15 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d/M – d/M - E, d/M  –  E, d/M - E, d/M  –  E, d/M + E, d/M – E, d/M + E, d/M – E, d/M MMM – MMM d – d MMM - d MMM  –  d MMM + d MMM – d MMM E, d MMM – E, d MMM @@ -3197,27 +3197,27 @@ Warnings: All cp values have U+FE0F characters removed. See /annotationsDerived/ d/M/y – d/M/y - E, d/M/y  –  E, d/M/y - E, d/M/y  –  E, d/M/y - E, d/M/y  –  E, d/M/y + E, d/M/y – E, d/M/y + E, d/M/y – E, d/M/y + E, d/M/y – E, d/M/y - MMM  –  MMM y - MMM y  –  MMM y + MMM – MMM y + MMM y – MMM y d – d MMM, y - d MMM  –  d MMM, y - d MMM, y  –  d MMM, y + d MMM – d MMM, y + d MMM, y – d MMM, y - E, d MMM  –  E, d MMM, y - E, d MMM  –  E, d MMM, y - E, d MMM, y  –  E, d MMM, y + E, d MMM – E, d MMM, y + E, d MMM – E, d MMM, y + E, d MMM, y – E, d MMM, y - MMMM  –  MMMM 'năm' y - MMMM 'năm' y  –  MMMM 'năm' y + MMMM – MMMM 'năm' y + MMMM 'năm' y – MMMM 'năm' y diff --git a/tools/cldr-code/src/main/java/org/unicode/cldr/test/DisplayAndInputProcessor.java b/tools/cldr-code/src/main/java/org/unicode/cldr/test/DisplayAndInputProcessor.java index 3145f05cd66..e9e980de343 100644 --- a/tools/cldr-code/src/main/java/org/unicode/cldr/test/DisplayAndInputProcessor.java +++ b/tools/cldr-code/src/main/java/org/unicode/cldr/test/DisplayAndInputProcessor.java @@ -155,6 +155,16 @@ public class DisplayAndInputProcessor { // private static final Pattern SPACE_PLUS_NBSP_TO_NORMALIZE = // PatternCache.get("\\u0020+[\\u00A0\\u202F]+"); + // NNBSP 202F among other horizontal spaces (includes 0020, 00A0, 2009, 202F, etc.) + private static final Pattern NNBSP_AMONG_OTHER_SPACES = + PatternCache.get("[\\h&&[^\\u202F]]+\\u202F\\h*|\\u202F\\h+"); + // NBSP 00A0 among other horizontal spaces + private static final Pattern NBSP_AMONG_OTHER_SPACES = + PatternCache.get("[\\h&&[^\\u00A0]]+\\u00A0\\h*|\\u00A0\\h+"); + // THIN SPACE 2009 among other horizontal spaces + private static final Pattern THIN_SPACE_AMONG_OTHER_SPACES = + PatternCache.get("[\\h&&[^\\u2009]]+\\u2009\\h*|\\u2009\\h+"); + private static final Pattern INITIAL_NBSP = PatternCache.get("^[\\u00A0\\u202F]+"); private static final Pattern FINAL_NBSP = PatternCache.get("[\\u00A0\\u202F]+$"); @@ -1289,6 +1299,14 @@ private String normalizeWhitespace(String path, String value) { value = PLACEHOLDER_SPACE_BEFORE.matcher(value).replaceAll("\u00A0{"); } + // Finally, replace remaining space combinations with most restrictive type CLDR-17233 + // If we have NNBSP U+202F in combination with other spaces, keep just it + value = NNBSP_AMONG_OTHER_SPACES.matcher(value).replaceAll("\u202F"); + // Else if we have NBSP U+00A0 in combination with other spaces, keep just it + value = NBSP_AMONG_OTHER_SPACES.matcher(value).replaceAll("\u00A0"); + // Else if we have THIN SPACE U+2009 in combination with other spaces, keep just it + value = THIN_SPACE_AMONG_OTHER_SPACES.matcher(value).replaceAll("\u2009"); + return value; } diff --git a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java index 625da1d47a3..caecc91c0fc 100644 --- a/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java +++ b/tools/cldr-code/src/test/java/org/unicode/cldr/unittest/TestDisplayAndInputProcessor.java @@ -687,6 +687,17 @@ class PathSpaceAdjustData { "//ldml/units/unitLength[@type=\"short\"]unit[@type=\"mass-gram\"]/unitPattern[@count=\"other\"]", "g {0}", "g\u00A0{0}"), + // tests for CLDR-17233 + new PathSpaceAdjustData( + "es", + "//ldml/dates/calendars/calendar[@type=\"gregorian\"]/dayPeriods/dayPeriodContext[@type=\"format\"]/dayPeriodWidth[@type=\"abbreviated\"]/dayPeriod[@type=\"am\"]", + "a. \u202Fm.", + "a.\u202Fm."), + new PathSpaceAdjustData( + "vi", + "//ldml/dates/calendars/calendar[@type=\"generic\"]/dateTimeFormats/intervalFormats/intervalFormatItem[@id=\"MMM\"]/greatestDifference[@id=\"M\"]", + "MMM\u2009 – \u2009MMM", + "MMM\u2009–\u2009MMM"), }; for (PathSpaceAdjustData testItem : testItems) {