Skip to content

Commit

Permalink
CLDR-17620 json: add transforms
Browse files Browse the repository at this point in the history
- new package cldr-transforms
- add manifest file transforms.json at the top level
- each transform has a metadata file (transforms/ID.json) and a raw text file (transforms/ID.txt).
- metadata has all of the keys from the transform rule
- the _rulesFile key formally indicates the textfile's name (in case we need to massage the id for some reason in the future).
  • Loading branch information
srl295 committed Sep 10, 2024
1 parent eb4b003 commit 7bdacce
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 8 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@ public static CldrNode createNode(
String fullTrunk = extractAttrs(fullPathSegment, node.nondistinguishingAttributes);
if (!node.name.equals(fullTrunk)) {
throw new ParseException(
"Error in parsing \"" + pathSegment + " \":\"" + fullPathSegment, 0);
"Error in parsing \""
+ pathSegment
+ "\":\""
+ fullPathSegment
+ " - "
+ node.name
+ " != "
+ fullTrunk,
0);
}

for (String key : node.distinguishingAttributes.keySet()) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@
import org.unicode.cldr.util.CLDRLocale;
import org.unicode.cldr.util.CLDRPaths;
import org.unicode.cldr.util.CLDRTool;
import org.unicode.cldr.util.CLDRTransforms;
import org.unicode.cldr.util.CLDRURLS;
import org.unicode.cldr.util.CalculatedCoverageLevels;
import org.unicode.cldr.util.CldrUtility;
Expand Down Expand Up @@ -88,6 +89,7 @@ public class Ldml2JsonConverter {
private static final String CLDR_PKG_PREFIX = "cldr-";
private static final String FULL_TIER_SUFFIX = "-full";
private static final String MODERN_TIER_SUFFIX = "-modern";
private static final String TRANSFORM_RAW_SUFFIX = ".txt";
private static Logger logger = Logger.getLogger(Ldml2JsonConverter.class.getName());

enum RunType {
Expand All @@ -98,7 +100,8 @@ enum RunType {
rbnf(false, true),
annotations,
annotationsDerived,
bcp47(false, false);
bcp47(false, false),
transforms(false, false);

private final boolean isTiered;
private final boolean hasLocales;
Expand Down Expand Up @@ -739,6 +742,8 @@ private int convertCldrItems(
outFilename = filenameAsLangTag + ".json";
} else if (type == RunType.bcp47) {
outFilename = filename + ".json";
} else if (type == RunType.transforms) {
outFilename = filename + ".json";
} else if (js.section.equals("other")) {
// If you see other-___.json, it means items that were missing from
// JSON_config_*.txt
Expand Down Expand Up @@ -775,11 +780,11 @@ private int convertCldrItems(
if (type == RunType.main) {
avl.full.add(filenameAsLangTag);
}
} else if (type == RunType.rbnf) {
js.packageName = "rbnf";
tier = "";
} else if (type == RunType.bcp47) {
js.packageName = "bcp47";
} else if (type == RunType.rbnf
|| type == RunType.bcp47
|| type == RunType.transforms) {
// untiered, just use the name
js.packageName = type.name();
tier = "";
}
if (js.packageName != null) {
Expand Down Expand Up @@ -884,6 +889,28 @@ private int convertCldrItems(
}
}

if (item.getUntransformedPath()
.startsWith("//supplementalData/transforms")) {
// here, write the raw data
final String rawTransformFile = filename + TRANSFORM_RAW_SUFFIX;
try (PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir, rawTransformFile)) {
outf.println(item.getValue());
// note: not logging the write here- it will be logged when the
// .json file is written.
}
// the value is now the raw filename
item.setValue(rawTransformFile);
item.setPath(
item.getPath()
.replaceAll("\\]/tRule.*$", "]/_rulesFile")
.replace("/transforms/", "/"));
item.setFullPath(
item.getFullPath()

Check failure

Code scanning / CodeQL

Polynomial regular expression used on uncontrolled data High

This
regular expression
that depends on a
user-provided value
may run slow on strings starting with ']/tRule' and with many repetitions of ']/tRule'.
This
regular expression
that depends on a
user-provided value
may run slow on strings starting with ']/tRule' and with many repetitions of ']/tRule'.
This
regular expression
that depends on a
user-provided value
may run slow on strings starting with ']/tRule' and with many repetitions of ']/tRule'.
This
regular expression
that depends on a
user-provided value
may run slow on strings starting with ']/tRule' and with many repetitions of ']/tRule'.
.replaceAll("\\]/tRule.*$", "]/_rulesFile")
.replace("/transforms/", "/"));
}

// some items need to be split to multiple item before processing. None
// of those items need to be sorted.
// Applies to SPLITTABLE_ATTRS attributes.
Expand Down Expand Up @@ -1453,6 +1480,24 @@ public void writeDefaultContent(String outputDir) throws IOException {
outf.close();
}

public void writeTransformMetadata(String outputDir) throws IOException {
final String dirName = outputDir + "/cldr-" + RunType.transforms.name();
final String fileName = RunType.transforms.name() + ".json";
PrintWriter outf = FileUtilities.openUTF8Writer(dirName, fileName);
System.out.println(
PACKAGE_ICON
+ " Creating packaging file => "
+ dirName
+ File.separator
+ fileName);
JsonObject obj = new JsonObject();
obj.add(
RunType.transforms.name(),
gson.toJsonTree(CLDRTransforms.getInstance().getJsonIndex()));
outf.println(gson.toJson(obj));
outf.close();
}

public void writeCoverageLevels(String outputDir) throws IOException {
try (PrintWriter outf =
FileUtilities.openUTF8Writer(outputDir + "/cldr-core", "coverageLevels.json"); ) {
Expand Down Expand Up @@ -2225,6 +2270,8 @@ public void processDirectory(String dirName, DraftStatus minimalDraftStatus)
if (Boolean.parseBoolean(options.get("packagelist").getValue())) {
writePackageList(outputDir);
}
} else if (type == RunType.transforms) {
writeTransformMetadata(outputDir);
}
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,14 @@ class LdmlConvertRules {
"identity:variant:type",

// in common/bcp47/*.xml
"keyword:key:name");
"keyword:key:name",

// transforms

// transforms
"transforms:transform:source",
"transforms:transform:target",
"transforms:transform:direction");

/**
* The set of element:attribute pair in which the attribute should be treated as value. All the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1128,4 +1128,19 @@ static String parseDoubleColon(String x, Set<String> others) {
}
return "";
}

public class CLDRTransformsJsonIndex {
/** raw list of available IDs */
public String[] available =
getAvailableIds().stream()
.map((String id) -> id.replace(".xml", ""))
.collect(Collectors.toList())
.toArray(new String[0]);
}

/** This gets the metadata (index file) exposed as cldr-json/cldr-transforms/transforms.json */
public CLDRTransformsJsonIndex getJsonIndex() {
final CLDRTransformsJsonIndex index = new CLDRTransformsJsonIndex();
return index;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
section=transforms ; path=//cldr/supplemental/transforms/.* ; package=transforms ; packageDesc=Transform data
dependency=core ; package=transforms

0 comments on commit 7bdacce

Please sign in to comment.