Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make MakeUnicodeFiles regenerate IndicMeowCategory.txt #547

Merged
merged 9 commits into from
Oct 4, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions unicodetools/data/ucd/dev/IndicPositionalCategory.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# IndicPositionalCategory-15.1.0.txt
# Date: 2023-01-05
# IndicPositionalCategory-16.0.0.txt
# Date: 2023-10-02, 22:58:33 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# For documentation, see UAX #44: Unicode Character Database,
# at https://www.unicode.org/reports/tr44/
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
markusicu marked this conversation as resolved.
Show resolved Hide resolved
#
# This file defines the following property:
#
Expand Down
10 changes: 5 additions & 5 deletions unicodetools/data/ucd/dev/IndicSyllabicCategory.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
# IndicSyllabicCategory-15.1.0.txt
# Date: 2023-01-05
# IndicSyllabicCategory-16.0.0.txt
# Date: 2023-10-02, 22:58:33 GMT
# © 2023 Unicode®, Inc.
# Unicode and the Unicode Logo are registered trademarks of Unicode, Inc. in the U.S. and other countries.
# For terms of use, see https://www.unicode.org/terms_of_use.html
#
# For documentation, see UAX #44: Unicode Character Database,
# at https://www.unicode.org/reports/tr44/
# Unicode Character Database
# For documentation, see https://www.unicode.org/reports/tr44/
#
# This file defines the following property:
#
Expand Down Expand Up @@ -1335,7 +1335,7 @@ ABF0..ABF9 ; Number # Nd [10] MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NI
# script, e.g. in Brahmi)
#
# Note: These are different from Numbers, in the way that there is no known
# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants.
# evidence of Brahmi Joining Numbers taking vowels or subjoined consonants.
# Until such evidence is found, implementations may assume that Brahmi
# Joining Numbers only participate in shaping with other Brahmi Joining
# Numbers.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ static class Format {
Map<String, List<String>> fileToPropertySet = new TreeMap<String, List<String>>();
Map<String, String> fileToComments = new TreeMap<String, String>();
Map<String, String> fileToDirectory = new TreeMap<String, String>();
Map<String, List<String>> propertyToOrderedValues = new TreeMap<String, List<String>>();
Map<String, Map<String, String>> propertyToValueToComments =
new TreeMap<String, Map<String, String>>();
Map<String, String> hackMap = new HashMap<String, String>();
Expand Down Expand Up @@ -110,6 +111,12 @@ public static class PrintStyle {
// Unicode 15.1 and later LineBreak.txt and EastAsianWidth.txt, which are all generated
// in that format by some other tool.
boolean kenFile = false;
// Whether the file should be produced in the style of IndicPositionalCategory.txt and
// IndicSyllabicCategory.txt, which are both generated in that format by some other
// tool.
boolean roozbehFile = false;
// Whether to separate values of enumerated properties using a line of equal signs.
boolean separateValues = true;
boolean hackValues = false;
boolean mergeRanges = true;
String nameStyle = "none";
Expand Down Expand Up @@ -138,6 +145,10 @@ String parse(String options) {
interleaveValues = true;
} else if (piece.equals("kenFile")) {
kenFile = true;
} else if (piece.equals("roozbehFile")) {
roozbehFile = true;
} else if (piece.startsWith("separateValues=")) {
separateValues = afterEqualsBoolean(piece);
} else if (piece.equals("hackValues")) {
hackValues = true;
} else if (piece.equals("sortNumeric")) {
Expand Down Expand Up @@ -301,6 +312,10 @@ private void build() {
}
line = line.trim();
if (line.length() == 0) {
if (comments.length() != 0) {
// Preserve blank lines between comments.
comments += "\n";
}
continue;
}
if (DEBUG) {
Expand All @@ -321,6 +336,7 @@ private void build() {
comments += line;
} else {
// end of comments, roll up
comments = comments.trim();
if (comments.length() != 0) {
if (property != null) {
addValueComments(property, value, comments);
Expand Down Expand Up @@ -350,6 +366,10 @@ private void build() {
value = "";
} else if (line.startsWith("Value:")) {
value = lineValue;
final var values =
propertyToOrderedValues.computeIfAbsent(
property, k -> new ArrayList<String>());
values.add(value);
} else if (line.startsWith("HackName:")) {
final String regularItem = Utility.getUnskeleton(lineValue, true);
hackMap.put(regularItem, lineValue);
Expand Down Expand Up @@ -1152,6 +1172,9 @@ public static void generatePropertyFile(String filename) throws IOException {
filename, Format.theFormat.getPrintStyle(name));
if (!ps.kenFile) {
pwProp.println();
if (!ps.separateValues) {
pwProp.println();
}
pwProp.println(SEPARATOR);
}
final String propComment = Format.theFormat.getValueComments(name, "");
Expand All @@ -1161,7 +1184,11 @@ public static void generatePropertyFile(String filename) throws IOException {
pwProp.println(propComment);
} else if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
pwProp.println();
pwProp.println("# Property:\t" + name);
if (ps.roozbehFile) {
pwProp.println("# Property: " + name);
} else {
pwProp.println("# Property:\t" + name);
}
}
}

Expand All @@ -1182,9 +1209,12 @@ public static void generatePropertyFile(String filename) throws IOException {
v = v + " (" + v2 + ")";
}
}
pwProp.println();
pwProp.println(ps.roozbehFile ? "#" : "");
pwProp.println("# All code points not explicitly listed for " + prop.getName());
pwProp.println("# have the value " + v + ".");
pwProp.println(
"# have the value "
+ v
+ (ps.roozbehFile && v.equals("NA") ? " (not applicable)." : "."));
}

if (!ps.interleaveValues && prop.isType(UnicodeProperty.BINARY_MASK)) {
Expand Down Expand Up @@ -1254,6 +1284,21 @@ private static void writeEnumeratedValues(
temp2.addAll(aliases);
aliases = temp2;
}
if (ps.roozbehFile) {
aliases.removeIf(alias -> UnicodeProperty.compareNames(alias, ps.skipValue) == 0);
if (!Format.theFormat
.propertyToOrderedValues
.get(prop.getName())
.containsAll(aliases)) {
final TreeSet<String> missingAliases = new TreeSet<String>(aliases);
missingAliases.removeAll(
Format.theFormat.propertyToOrderedValues.get(prop.getName()));
throw new IllegalArgumentException(
"All values must be listed when using roozbehFile; missing "
+ missingAliases);
}
aliases = Format.theFormat.propertyToOrderedValues.get(prop.getName());
}
if (ps.sortNumeric) {
if (DEBUG) {
System.out.println("Reordering");
Expand Down Expand Up @@ -1284,7 +1329,7 @@ private static void writeEnumeratedValues(

final String missing = ps.skipUnassigned != null ? ps.skipUnassigned : ps.skipValue;
if (missing != null && !missing.equals(UCD_Names.NO)) {
pw.println();
pw.println(ps.roozbehFile ? "#" : "");
final String propName = bf.getPropName();
// if (propName == null) propName = "";
// else if (propName.length() != 0) propName = propName + "; ";
Expand All @@ -1302,6 +1347,10 @@ private static void writeEnumeratedValues(
writeEnumeratedMissingValues(pw, overallDefault, defaultLbValues);
}
}
if (!ps.separateValues) {
pw.println();
pw.println(SEPARATOR.replace('=', '-'));
}
for (final Iterator<String> it = aliases.iterator(); it.hasNext(); ) {
final String value = it.next();
if (DEBUG) {
Expand Down Expand Up @@ -1416,9 +1465,13 @@ private static void writeEnumeratedValues(

if (!prop.isType(UnicodeProperty.BINARY_MASK)) {
pw.println();
pw.println(SEPARATOR);
if (ps.separateValues) {
pw.println(SEPARATOR);
}
if (nonLongValue) {
pw.println();
if (ps.separateValues) {
pw.println();
}
pw.println("# " + prop.getName() + "=" + value);
}
}
Expand All @@ -1442,6 +1495,11 @@ private static void writeEnumeratedValues(
pw.println();
// if (s.size() != 0)
bf.setMergeRanges(ps.mergeRanges);
bf.setShowTotal(!ps.roozbehFile);
if (ps.roozbehFile) {
bf.setRangeBreakSource(
ToolUnicodePropertySource.make(Default.ucdVersion()).getProperty("Block"));
}
bf.showSetNames(pw, s);
if (DEBUG) {
System.out.println(bf.showSetNames(s));
Expand Down
Loading