Skip to content
This repository has been archived by the owner on Aug 5, 2024. It is now read-only.

Commit

Permalink
Refactor Java version to use cleanupSplitSurrogates
Browse files Browse the repository at this point in the history
  • Loading branch information
dmsnell committed May 10, 2023
1 parent 191b698 commit d681ef6
Showing 1 changed file with 41 additions and 24 deletions.
65 changes: 41 additions & 24 deletions java/src/name/fraser/neil/plaintext/diff_match_patch.java
Original file line number Diff line number Diff line change
Expand Up @@ -1294,6 +1294,46 @@ public void diff_cleanupMerge(LinkedList<Diff> diffs) {
}
}

/**
* Rearrange diff boudnaries that split Unicode surrogate pairs.
* @param diffs Linked list of diff objects
*/
public void diff_cleanupSplitSurrogates(List<Diff> diffs) {
char lastEnd = 0;
boolean isFirst = true;
HashSet<Diff> toRemove = new HashSet<Diff>();

for (Diff aDiff : diffs) {
if (aDiff.text.isEmpty()) {
toRemove.add(aDiff);
continue;
}

char thisTop = aDiff.text.charAt(0);
char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1);

if (Character.isHighSurrogate(thisEnd)) {
lastEnd = thisEnd;
aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1);
}

if (!isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) {
aDiff.text = lastEnd + aDiff.text;
}

isFirst = false;

if ( aDiff.text.isEmpty() ) {
toRemove.add(aDiff);
continue;
}
}

for (Diff aDiff : toRemove) {
diffs.remove(aDiff);
}
}

/**
* loc is a location in text1, compute and return the equivalent location in
* text2.
Expand Down Expand Up @@ -1430,31 +1470,8 @@ public int diff_levenshtein(List<Diff> diffs) {
*/
public String diff_toDelta(List<Diff> diffs) {
StringBuilder text = new StringBuilder();
char lastEnd = 0;
boolean isFirst = true;
this.diff_cleanupSplitSurrogates(diffs);
for (Diff aDiff : diffs) {
if (aDiff.text.isEmpty()) {
continue;
}

char thisTop = aDiff.text.charAt(0);
char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1);

if (Character.isHighSurrogate(thisEnd)) {
lastEnd = thisEnd;
aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1);
}

if (! isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) {
aDiff.text = lastEnd + aDiff.text;
}

isFirst = false;

if ( aDiff.text.isEmpty() ) {
continue;
}

switch (aDiff.operation) {
case INSERT:
try {
Expand Down

0 comments on commit d681ef6

Please sign in to comment.