Skip to content

Commit

Permalink
Some very crappy code
Browse files Browse the repository at this point in the history
  • Loading branch information
eggrobin committed Dec 29, 2023
1 parent 3880c4f commit b3b53c0
Showing 1 changed file with 82 additions and 1 deletion.
Original file line number Diff line number Diff line change
Expand Up @@ -949,11 +949,23 @@ public static void writeNormalizerTestSuite(String directory, String fileName)
log.println("#");

final Map<String, String> decompositions = new TreeMap();
final Map<Integer, Set<String>> decomposablesByFirstCodePoint = new TreeMap();
final Map<Integer, Set<String>> decomposablesByLastCodePoint = new TreeMap();
for (int cp = 0; cp <= 0x10FFFF; ++cp) {
String c = Character.toString(cp);
String decomposition = Default.nfd().normalize(cp);
if (!decomposition.equals(c)) {
decompositions.put(c, decomposition);
if (decomposition.codePointCount(0, decomposition.length()) > 1) {
int first = decomposition.codePointAt(0);
int last = decomposition.codePointBefore(decomposition.length());
decomposablesByFirstCodePoint
.computeIfAbsent(first, key -> new TreeSet<>())
.add(c);
decomposablesByLastCodePoint
.computeIfAbsent(last, key -> new TreeSet<>())
.add(c);
}
}
}
for (String decomposition : decompositions.values()) {
Expand All @@ -969,7 +981,9 @@ public static void writeNormalizerTestSuite(String directory, String fileName)
candidateCharacters.add(candidateEntry.getKey());
}
}
for (int length = 2; length < decomposition.length(); ++length) {
for (int length = 2;
length < decomposition.codePointCount(0, decomposition.length());
++length) {
forAllStrings(
candidateCharacters,
"",
Expand All @@ -983,6 +997,73 @@ public static void writeNormalizerTestSuite(String directory, String fileName)
}
}

for (String linkDecomposition : decompositions.values()) {
int first = linkDecomposition.codePointAt(0);
if (linkDecomposition.length() == UTF16.getCharCount(first)) {
continue;
}
int second = linkDecomposition.codePointAt(UTF16.getCharCount(first));
if (decomposablesByLastCodePoint.containsKey(first)
&& decomposablesByFirstCodePoint.containsKey(second)) {
System.out.println(
Default.ucd().getName(first) + "+" + Default.ucd().getName(second) + "?");
for (String firstCandidate : decomposablesByLastCodePoint.get(first)) {
for (String secondCandidate : decomposablesByFirstCodePoint.get(second)) {
String firstDecomposition = Default.nfd().normalize(firstCandidate);
String secondDecomposition = Default.nfd().normalize(secondCandidate);
String decomposition = firstDecomposition + secondDecomposition;
System.out.println(
Default.ucd().getName(firstCandidate)
+ "+"
+ Default.ucd().getName(secondCandidate));
final Set<String> candidateCharacters = new TreeSet<>();
decomposition
.codePoints()
.forEach(cp -> candidateCharacters.add(Character.toString(cp)));
for (Map.Entry<String, String> candidateEntry : decompositions.entrySet()) {
if (candidateEntry
.getValue()
.codePoints()
.allMatch(
cp -> decomposition.contains(Character.toString(cp)))) {
candidateCharacters.add(candidateEntry.getKey());
}
}
for (int length = 2;
length < decomposition.codePointCount(0, decomposition.length());
++length) {
forAllStrings(
candidateCharacters,
"",
length,
s -> {
if (!s.equals(decomposition)
&& Default.nfd()
.normalize(s)
.equals(decomposition)) {
for (int j = 0; j < s.length(); ++j) {
if (Default.nfd()
.normalize(s.substring(0, j))
.equals(firstDecomposition)
&& Default.nfd()
.normalize(s.substring(j))
.equals(secondDecomposition)) {
return;
}
}
writeLine(s, log, true);
System.out.println(Default.ucd().getName(s));
}
});
System.out.println("Done " + length + "-character strings");
}
System.out.println("Done this pair");
}
}
System.out.println("Done this link");
}
}

Utility.fixDot();

log.println("#");
Expand Down

0 comments on commit b3b53c0

Please sign in to comment.