Skip to content

Commit

Permalink
CLDR-17561 Utility for visiting DtdData elements (#3646)
Browse files Browse the repository at this point in the history
  • Loading branch information
macchiati authored Apr 23, 2024
1 parent 2104fff commit 4dd168a
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 4 deletions.
8 changes: 4 additions & 4 deletions common/dtd/ldmlSupplemental.dtd
Original file line number Diff line number Diff line change
Expand Up @@ -1155,17 +1155,17 @@ CLDR data files are interpreted according to the LDML specification (http://unic

<!ELEMENT matchVariable EMPTY >
<!ATTLIST matchVariable id CDATA #REQUIRED >
<!--@MATCH:regex/\$[a-zA-Z0-9_]+-->
<!--@MATCH:regex/(?!undefined)\$[a-zA-Z0-9_]+-->
<!ATTLIST matchVariable value CDATA #REQUIRED >
<!--@MATCH:any-->
<!--@MATCH:regex/([A-Z]{2}|[0-9]{3})(\+([A-Z]{2}|[0-9]{3}))*-->
<!--@VALUE-->

<!ELEMENT languageMatch EMPTY >
<!--@ORDERED-->
<!ATTLIST languageMatch desired CDATA #REQUIRED >
<!--@MATCH:any/TODO-->
<!--@MATCH:regex/(?!undefined).*-->
<!ATTLIST languageMatch supported CDATA #REQUIRED >
<!--@MATCH:any/TODO-->
<!--@MATCH:any/regex/(?!undefined).*-->
<!ATTLIST languageMatch percent NMTOKEN #IMPLIED >
<!--@MATCH:range/0~100-->
<!--@VALUE-->
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package com.ibm.icu.util;

import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;

public class MatchElementAttribute {
private Multimap<String, String> matchElementAttribute =
HashMultimap.create(); // "" is a wildcard

public MatchElementAttribute add(String... elementAttributePairs) {
for (int i = 0; i < elementAttributePairs.length; i += 2) {
matchElementAttribute.put(elementAttributePairs[i], elementAttributePairs[i + 1]);
}
return this;
}

public boolean matches(String element, String attribute) {
return matchElementAttribute.containsEntry(element, attribute)
|| matchElementAttribute.containsEntry("", attribute)
|| matchElementAttribute.containsEntry(element, "");
}
}
48 changes: 48 additions & 0 deletions tools/cldr-code/src/main/java/org/unicode/cldr/util/DtdData.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.TreeMap;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
Expand Down Expand Up @@ -2269,4 +2270,51 @@ public static Element getElement(String xpath, int elementIndex) {
.getElementFromName()
.get(parts.getElement(elementIndex));
}

public static class DtdGuide {
public interface DtdVisitor {
/** Return false if all children should be skipped */
public boolean visit(
DtdType dtdType, Stack<Element> ancestors, Element child, Attribute attribute);
}

private Set<Element> seenElements = new HashSet<>();
private DtdVisitor dtdVisitor;
private DtdType dtdType;
private Stack<Element> ancestors = new Stack<>();
private boolean skipDeprecated;

public DtdGuide(boolean skipDeprecated, DtdVisitor dtdVisitor) {
this.dtdVisitor = dtdVisitor;
this.skipDeprecated = skipDeprecated;
process(DtdType.values());
}

public void process(DtdType... dtdTypes) {
for (DtdType dt : dtdTypes.length != 0 ? dtdTypes : DtdType.values()) {
dtdType = dt;
process(getInstance(dtdType).ROOT);
}
}

private void process(Element element) {
if (seenElements.contains(element) || !skipDeprecated && element.isDeprecated()) {
return;
}
seenElements.add(element);
for (Attribute attribute : element.getAttributes().keySet()) {
if (!skipDeprecated && attribute.isDeprecated()) {
continue;
}
if (!dtdVisitor.visit(dtdType, ancestors, element, attribute)) {
return;
}
}
ancestors.push(element);
for (Element child : element.getChildren().keySet()) {
process(child);
}
ancestors.pop();
}
}
}
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package org.unicode.cldr.unittest;

import com.google.common.collect.ImmutableSet;
import com.ibm.icu.util.MatchElementAttribute;
import java.io.File;
import java.util.ArrayList;
import java.util.Arrays;
Expand All @@ -12,6 +13,7 @@
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.Stack;
import java.util.TreeSet;
import org.unicode.cldr.util.CLDRConfig;
import org.unicode.cldr.util.CLDRFile;
Expand All @@ -24,8 +26,10 @@
import org.unicode.cldr.util.CldrUtility;
import org.unicode.cldr.util.DtdData;
import org.unicode.cldr.util.DtdData.Attribute;
import org.unicode.cldr.util.DtdData.DtdGuide.DtdVisitor;
import org.unicode.cldr.util.DtdData.Element;
import org.unicode.cldr.util.DtdData.ElementType;
import org.unicode.cldr.util.DtdData.ValueStatus;
import org.unicode.cldr.util.DtdType;
import org.unicode.cldr.util.Pair;
import org.unicode.cldr.util.PathHeader;
Expand Down Expand Up @@ -694,4 +698,84 @@ private int removeNonDistinguishing(
}
return counter;
}

public void testForUndefined() {
DtdVisitor visitor =
new DtdVisitor() {
final MatchElementAttribute skipAttributeNames =
new MatchElementAttribute()
.add( // Add, once checking to make sure that these are safe.
// Pairs of element, attribute
"", "references", //
"", "cp", //
"version", "", //
// "ruleset", "type", //
"parseLenient", "", // UnicodeSet
"ruleset", "", // special structure
"casingItem", "", // Special structure
"unitIdComponent", "", // small, relatively fixed set
"unitConstant", "", // only used internally/...
"unitQuantity",
"", // quantity and and baseUnit will be in
// validity/...
"convertUnit", "", // source and baseUnit will be in
// validity/...
"unitPreferences",
"category", // category == quantity will be in
// validity/...
"unitPreferences",
"usage", // usage will be in validity/...
"unitPreference", "", // not ids
"transform", "", // not ids
"numberingSystem", "rules", // rule format can't match
"coverageVariable", "", // no ids
"coverageLevel", "", // no ids
"approvalRequirement", "", // no ids
"pathMatch", "", // no ids
"languageMatch", "", // no ids
"rgPath", "", // no ids
"mapTimezones", "", // ids checked elsewhere
"mapZone", "" // ids checked elsewhere
);
final Set<String> skipElementAndChildren = Set.of("keyboard3", "keyboardTest3");

@Override
public boolean visit(
DtdType dtdType,
Stack<Element> ancestors,
Element element,
Attribute attribute) {
if (skipElementAndChildren.contains(element.getName())) {
return false;
}
final String attributeName = attribute.getName();
if (skipAttributeNames.matches(element.getName(), attributeName)) {
return true;
}
final ValueStatus valueStatus = attribute.getValueStatus("undefined");
attribute.toString();
if (valueStatus == ValueStatus.valid) {
errln(
String.format(
"Can match 'undefined': type=%s\tancestors=%s\telement=%s\tattribute=%s\tmatch=%s",
dtdType,
ancestors,
element,
attributeName,
attribute.getMatchString()));
} else {
logln(
String.format(
"visiting: type=%s\tparent=%s\telement=%s\tancestors=%s\tmatch=%s",
dtdType,
ancestors,
element,
attributeName,
attribute.getMatchString()));
}
return true;
}
};
new DtdData.DtdGuide(true, visitor).process();
}
}

0 comments on commit 4dd168a

Please sign in to comment.