Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

CLDR-16034 Check for pairing bidi markup chars (illegal); remove 2 (unpaired) RLO in ff_Adlm #3606

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions common/main/ff_Adlm.xml
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<territory type="BS">𞤄𞤢𞤸𞤢𞤥𞤢𞥄𞤧</territory>
<territory type="BT">𞤄𞤵𞥅𞤼𞤢𞥄𞤲</territory>
<territory type="BV">𞤅𞤵𞤪𞤭𞥅𞤪𞤫 𞤄𞤵𞥅𞤾𞤫𞥅</territory>
<territory type="BW">𞤄𞤮𞤼𞤧𞤵𞤱𞤢𞥄𞤲𞤢</territory>
<territory type="BW">𞤄𞤮𞤼𞤧𞤵𞤱𞤢𞥄𞤲𞤢</territory>
<territory type="BY">𞤄𞤫𞤤𞤢𞤪𞤵𞥅𞤧</territory>
<territory type="BZ">𞤄𞤫𞤤𞤭𞥅𞥁</territory>
<territory type="CA">𞤑𞤢𞤲𞤢𞤣𞤢𞥄</territory>
Expand Down Expand Up @@ -8603,7 +8603,7 @@ CLDR data files are interpreted according to the LDML specification (http://unic
<exemplarCity>𞤐𞤵𞥅𞤳</exemplarCity>
</zone>
<zone type="America/Scoresbysund">
<exemplarCity>𞤋𞤼𞥆𞤮𞤳𞤮𞤪𞤼𞤮𞥅𞤪𞤥𞤭𞥅𞤼</exemplarCity>
<exemplarCity>𞤋𞤼𞥆𞤮𞤳𞤮𞤪𞤼𞤮𞥅𞤪𞤥𞤭𞥅𞤼</exemplarCity>
</zone>
<zone type="America/Danmarkshavn">
<exemplarCity>𞤁𞤢𞥄𞤲𞤥𞤢𞤪𞤳𞥃𞤢𞥄𞤾𞤲</exemplarCity>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -902,7 +902,8 @@ public enum Subtype {
namePlaceholderProblem,
missingSpaceBetweenNameFields,
illegalParameterValue,
illegalAnnotationCode;
illegalAnnotationCode,
illegalCharacter;

@Override
public String toString() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,10 @@
import org.unicode.cldr.util.XPathParts;

public class CheckForExemplars extends FactoryCheckCLDR {
private static final UnicodeSet RTL_CONTROLS =
new UnicodeSet("[\\u061C\\u200E\\u200F\\u202A-\\u202D\\u2066-\\u2069]");
private static final UnicodeSet RTL_CONTROLS = new UnicodeSet("[\\u061C\\u200E\\u200F]");

private static final UnicodeSet ILLEGAL_RTL_CONTROLS =
new UnicodeSet("[\\u202A-\\u202E\\u2066-\\u2069]");

private static final UnicodeSet RTL = new UnicodeSet("[[:bc=AL:][:bc=R:]]");

Expand Down Expand Up @@ -318,6 +320,9 @@ public CheckCLDR handleCheck(
// if (path.indexOf("/calendar") >= 0 && path.indexOf("gregorian") <= 0) return this;
}

// Check all paths for illegal characters, even EXEMPLAR_SKIPS
checkIllegalCharacters(path, value, result);

if (containsPart(path, EXEMPLAR_SKIPS)) {
return this;
}
Expand Down Expand Up @@ -568,6 +573,20 @@ public CheckCLDR handleCheck(
return this;
}

// Check for characters that are always illegal in values.
// Currently those are just the paired bidi marks.
private void checkIllegalCharacters(String path, String value, List<CheckStatus> result) {
if (ILLEGAL_RTL_CONTROLS.containsSome(value)) {
result.add(
new CheckStatus()
.setCause(this)
.setMainType(CheckStatus.errorType)
.setSubtype(Subtype.illegalCharacter)
.setMessage(
"Bidi markup can only include LRM RLM ALM, not paired characters such as FSI PDI"));
}
}

private String checkAndReplacePlaceholders(
String path, String value, List<CheckStatus> result) {
CheckStatus.Type statusType =
Expand Down
Loading