Skip to content

Commit

Permalink
meow
Browse files Browse the repository at this point in the history
eggrobin committed Dec 13, 2024
1 parent f731c41 commit 35a5a65
Showing 2 changed files with 57 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -480,6 +480,7 @@ value, new ParsePosition(0), IUP.getXSymbolTable()))) {

generateTest(false, path, outFilename, propertyName);
generateCppOldMonkeys(extraPath, outFilename);
generateJavaOldMonkeys(extraPath, outFilename);
}

private void generateCppOldMonkeys(String path, String outFilename) throws IOException {
@@ -512,6 +513,36 @@ private void generateCppOldMonkeys(String path, String outFilename) throws IOExc
fc.close();
}

private void generateJavaOldMonkeys(String path, String outFilename) throws IOException {
final UnicodeDataFile fc = UnicodeDataFile.openAndWriteHeader(path, outFilename + ".java");
final PrintWriter out = fc.out;
out.println();
out.println("####### Instructions ###################################");
out.println("# Copy the following lines into RBBITestMonkey.java in #");
out.println(
"# ICU4J, in the constructor of RBBIMeowMonkey, replacing #"
.replace("Meow", outFilename.substring(0, 4).replace("Graph", "Char")));
out.println("# the existing block of generated code. #");
out.println("########################################################");
out.println();
out.println(" // --- NOLI ME TANGERE ---");
out.println(" // Generated by GenerateBreakTest.java in the Unicode tools.");
for (Segmenter.Builder.NamedRefinedSet part : segmenter.getPartitionDefinition()) {
out.println(
" partition.add(new NamedSet(\""
+ part.getName().replace("\\", "\\\\").replace("\"", "\\\"")
+ "\", new UnicodeSet(\""
+ part.getDefinition().replace("\\", "\\\\").replace("\"", "\\\"")
+ "\")));");
}
out.println();
for (Segmenter.SegmentationRule rule : segmenter.getRules()) {
out.println(" rules.add(" + rule.toJavaOldMonkeyString() + ");");
}
out.println(" // --- End of generated code. ---");
fc.close();
}

private void generateTest(
boolean shortVersion, String path, String outFilename, String propertyName)
throws IOException {
26 changes: 26 additions & 0 deletions unicodetools/src/main/java/org/unicode/tools/Segmenter.java
Original file line number Diff line number Diff line change
@@ -283,6 +283,8 @@ public String toString() {
}

public abstract String toCppOldMonkeyString();

public abstract String toJavaOldMonkeyString();
}

/** A « treat as » rule. */
@@ -390,6 +392,17 @@ public String toCppOldMonkeyString() {
+ replacement
+ ")\")";
}

@Override
public String toJavaOldMonkeyString() {
return "new RemapRule(\""
+ name.replace("\\", "\\\\").replace("\"", "\\\"")
+ "\", \""
+ patternDefinition.replace("\\", "\\\\").replace("\"", "\\\"")
+ "\", \""
+ replacement.replace("\\", "\\\\").replace("\"", "\\\"")
+ "\")";
}
}

/** A rule that determines the status of an offset. */
@@ -487,6 +500,19 @@ public String toCppOldMonkeyString() {
+ ")\")";
}

@Override
public String toJavaOldMonkeyString() {
return "new RegexRule(\""
+ name.replace("\\", "\\\\").replace("\"", "\\\"")
+ "\", \""
+ beforeDefinition.replace("\\", "\\\\").replace("\"", "\\\"")
+ "\", Resolution."
+ breaks.name()
+ ", \""
+ afterDefinition.replace("\\", "\\\\").replace("\"", "\\\"")
+ "\")";
}

// ============== Internals ================
// We cannot use a single regex of the form "(?<= before) after" because
// (RI RI)* RI × RI would require unbounded lookbehind.

0 comments on commit 35a5a65

Please sign in to comment.