Skip to content

Commit

Permalink
CLDR-16034 Check for pairing bidi markup chars (illegal); remove 2 (u…
Browse files Browse the repository at this point in the history
…npaired) RLO in ff_Adlm (#3606)
  • Loading branch information
pedberg-icu authored Apr 3, 2024
1 parent f1401ba commit 9de9528
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 5 deletions.
4 changes: 2 additions & 2 deletions common/main/ff_Adlm.xml
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<territory type="BS">𞤄𞤢𞤸𞤢𞤥𞤢𞥄𞤧</territory>
<territory type="BT">𞤄𞤵𞥅𞤼𞤢𞥄𞤲</territory>
<territory type="BV">𞤅𞤵𞤪𞤭𞥅𞤪𞤫 𞤄𞤵𞥅𞤾𞤫𞥅</territory>
<territory type="BW">𞤄𞤮𞤼𞤧𞤵𞤱𞤢𞥄𞤲𞤢</territory>
<territory type="BW">𞤄𞤮𞤼𞤧𞤵𞤱𞤢𞥄𞤲𞤢</territory>
<territory type="BY">𞤄𞤫𞤤𞤢𞤪𞤵𞥅𞤧</territory>
<territory type="BZ">𞤄𞤫𞤤𞤭𞥅𞥁</territory>
<territory type="CA">𞤑𞤢𞤲𞤢𞤣𞤢𞥄</territory>
Expand Down Expand Up @@ -8603,7 +8603,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<exemplarCity>𞤐𞤵𞥅𞤳</exemplarCity>
</zone>
<zone type="America/Scoresbysund">
<exemplarCity>𞤋𞤼𞥆𞤮𞤳𞤮𞤪𞤼𞤮𞥅𞤪𞤥𞤭𞥅𞤼</exemplarCity>
<exemplarCity>𞤋𞤼𞥆𞤮𞤳𞤮𞤪𞤼𞤮𞥅𞤪𞤥𞤭𞥅𞤼</exemplarCity>
</zone>
<zone type="America/Danmarkshavn">
<exemplarCity>𞤁𞤢𞥄𞤲𞤥𞤢𞤪𞤳𞥃𞤢𞥄𞤾𞤲</exemplarCity>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,8 @@ public enum Subtype {
namePlaceholderProblem,
missingSpaceBetweenNameFields,
illegalParameterValue,
illegalAnnotationCode;
illegalAnnotationCode,
illegalCharacter;

@Override
public String toString() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@
import org.unicode.cldr.util.XPathParts;

public class CheckForExemplars extends FactoryCheckCLDR {
private static final UnicodeSet RTL_CONTROLS =
new UnicodeSet("[\\u061C\\u200E\\u200F\\u202A-\\u202D\\u2066-\\u2069]");
private static final UnicodeSet RTL_CONTROLS = new UnicodeSet("[\\u061C\\u200E\\u200F]");

private static final UnicodeSet ILLEGAL_RTL_CONTROLS =
new UnicodeSet("[\\u202A-\\u202E\\u2066-\\u2069]");

private static final UnicodeSet RTL = new UnicodeSet("[[:bc=AL:][:bc=R:]]");

Expand Down Expand Up @@ -318,6 +320,9 @@ public CheckCLDR handleCheck(
// if (path.indexOf("/calendar") >= 0 && path.indexOf("gregorian") <= 0) return this;
}

// Check all paths for illegal characters, even EXEMPLAR_SKIPS
checkIllegalCharacters(path, value, result);

if (containsPart(path, EXEMPLAR_SKIPS)) {
return this;
}
Expand Down Expand Up @@ -568,6 +573,20 @@ public CheckCLDR handleCheck(
return this;
}

// Check for characters that are always illegal in values.
// Currently those are just the paired bidi marks.
private void checkIllegalCharacters(String path, String value, List<CheckStatus> result) {
if (ILLEGAL_RTL_CONTROLS.containsSome(value)) {
result.add(
new CheckStatus()
.setCause(this)
.setMainType(CheckStatus.errorType)
.setSubtype(Subtype.illegalCharacter)
.setMessage(
"Bidi markup can only include LRM RLM ALM, not paired characters such as FSI PDI"));
}
}

private String checkAndReplacePlaceholders(
String path, String value, List<CheckStatus> result) {
CheckStatus.Type statusType =
Expand Down

0 comments on commit 9de9528

Please sign in to comment.