From d76675adc342003bd5219b36c700f74a2d6428dd Mon Sep 17 00:00:00 2001 From: stonewell Date: Thu, 25 Apr 2019 18:08:36 -0700 Subject: [PATCH 1/7] move to stl, compile passed --- cpp/.gitignore | 3 + cpp/Makefile | 32 + cpp/diff_match_patch.cpp | 3648 +++++++++++++++++---------------- cpp/diff_match_patch.h | 1125 +++++----- cpp/diff_match_patch_test.cpp | 1943 +++++++++--------- cpp/diff_match_patch_test.h | 116 +- cpp/diff_match_patch_util.cpp | 184 ++ cpp/diff_match_patch_util.h | 40 + 8 files changed, 3695 insertions(+), 3396 deletions(-) create mode 100644 cpp/.gitignore create mode 100644 cpp/Makefile create mode 100644 cpp/diff_match_patch_util.cpp create mode 100644 cpp/diff_match_patch_util.h diff --git a/cpp/.gitignore b/cpp/.gitignore new file mode 100644 index 0000000..1be7842 --- /dev/null +++ b/cpp/.gitignore @@ -0,0 +1,3 @@ +libdiff_match_patch.a +objs/ +test_diff_match_patch diff --git a/cpp/Makefile b/cpp/Makefile new file mode 100644 index 0000000..7f317e2 --- /dev/null +++ b/cpp/Makefile @@ -0,0 +1,32 @@ +SOURCES := diff_match_patch.cpp diff_match_patch_util.cpp + +OBJDIR := objs + +OBJECTS := $(addprefix $(OBJDIR)/, $(notdir $(patsubst %.cpp,%.o,$(SOURCES)))) +OUT_LIB := libdiff_match_patch.a + +CXXFLAGS := -std=c++17 \ + -g -O0 + +TEST_SRC := diff_match_patch_test.cpp +TEST_OBJECTS := $(addprefix $(OBJDIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SRC)))) +TEST_EXEC := test_diff_match_patch + +.PHONY: all clean init + +all: init $(OUT_LIB) $(TEST_EXEC) + +$(OBJDIR)/%.o : %.cpp %.h + $(CXX) $(CFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -o "$@" "$<" + +$(OUT_LIB) : $(OBJECTS) + $(AR) -rv "$@" $(OBJECTS) + +$(TEST_EXEC) : $(TEST_OBJECTS) $(OUT_LIB) + $(CXX) $(LDFLAGS) -o "$@" "$<" $(OUT_LIB) + +init: + mkdir -p $(OBJDIR) + +clean: + $(RM) -r $(OUT_LIB) $(OBJDIR) $(TEST_EXEC) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index 64f270c..141f4c8 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -18,12 +18,19 @@ #include #include -// Code known to compile and run with Qt 4.3 through Qt 4.7. -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + #include #include "diff_match_patch.h" - ////////////////////////// // // Diff Class @@ -36,37 +43,41 @@ * @param operation One of INSERT, DELETE or EQUAL * @param text The text being applied */ -Diff::Diff(Operation _operation, const QString &_text) : - operation(_operation), text(_text) { - // Construct a diff with the specified operation and text. +Diff::Diff(Operation _operation, const std::wstring &_text) : + operation(_operation), text(_text), invalid{true} { + // Construct a diff with the specified operation and text. } +Diff::Diff(Operation _operation, const wchar_t * _text): + operation(_operation), text{_text == NULL ? L"": _text}, invalid{_text == NULL} { + // Construct a diff with the specified operation and text. + } Diff::Diff() { } -QString Diff::strOperation(Operation op) { - switch (op) { +std::wstring Diff::strOperation(Operation op) { + switch (op) { case INSERT: - return "INSERT"; + return L"INSERT"; case DELETE: - return "DELETE"; + return L"DELETE"; case EQUAL: - return "EQUAL"; - } - throw "Invalid operation."; + return L"EQUAL"; + } + throw L"Invalid operation."; } /** * Display a human-readable version of this Diff. * @return text version */ -QString Diff::toString() const { - QString prettyText = text; - // Replace linebreaks with Pilcrow signs. - prettyText.replace('\n', L'\u00b6'); - return QString("Diff(") + strOperation(operation) + QString(",\"") - + prettyText + QString("\")"); +std::wstring Diff::toString() const { + std::wstring prettyText = text; + // Replace linebreaks with Pilcrow signs. + std::replace_all(prettyText, L"\n", L"\u00b6"); + return std::wstring(L"Diff(") + strOperation(operation) + std::wstring(L",\"") + + prettyText + std::wstring(L"\")"); } /** @@ -75,11 +86,11 @@ QString Diff::toString() const { * @return true or false */ bool Diff::operator==(const Diff &d) const { - return (d.operation == this->operation) && (d.text == this->text); + return (d.operation == this->operation) && (d.text == this->text); } bool Diff::operator!=(const Diff &d) const { - return !(operator == (d)); + return !(operator == (d)); } @@ -94,16 +105,16 @@ bool Diff::operator!=(const Diff &d) const { * Constructor. Initializes with an empty list of diffs. */ Patch::Patch() : - start1(0), start2(0), - length1(0), length2(0) { + start1(0), start2(0), + length1(0), length2(0) { } bool Patch::isNull() const { - if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 - && diffs.size() == 0) { - return true; - } - return false; + if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 + && diffs.size() == 0) { + return true; + } + return false; } @@ -113,45 +124,45 @@ bool Patch::isNull() const { * Indices are printed as 1-based, not 0-based. * @return The GNU diff string */ -QString Patch::toString() { - QString coords1, coords2; - if (length1 == 0) { - coords1 = QString::number(start1) + QString(",0"); - } else if (length1 == 1) { - coords1 = QString::number(start1 + 1); - } else { - coords1 = QString::number(start1 + 1) + QString(",") - + QString::number(length1); - } - if (length2 == 0) { - coords2 = QString::number(start2) + QString(",0"); - } else if (length2 == 1) { - coords2 = QString::number(start2 + 1); - } else { - coords2 = QString::number(start2 + 1) + QString(",") - + QString::number(length2); - } - QString text; - text = QString("@@ -") + coords1 + QString(" +") + coords2 - + QString(" @@\n"); - // Escape the body of the patch with %xx notation. - foreach (Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: - text += QString('+'); - break; - case DELETE: - text += QString('-'); - break; - case EQUAL: - text += QString(' '); - break; - } - text += QString(QUrl::toPercentEncoding(aDiff.text, " !~*'();/?:@&=+$,#")) - + QString("\n"); - } - - return text; +std::wstring Patch::toString() { + std::wstring coords1, coords2; + if (length1 == 0) { + coords1 = std::to_wstring(start1) + std::wstring(L",0"); + } else if (length1 == 1) { + coords1 = std::to_wstring(start1 + 1); + } else { + coords1 = std::to_wstring(start1 + 1) + std::wstring(L",") + + std::to_wstring(length1); + } + if (length2 == 0) { + coords2 = std::to_wstring(start2) + std::wstring(L",0"); + } else if (length2 == 1) { + coords2 = std::to_wstring(start2 + 1); + } else { + coords2 = std::to_wstring(start2 + 1) + std::wstring(L",") + + std::to_wstring(length2); + } + std::wstring text; + text = std::wstring(L"@@ -") + coords1 + std::wstring(L" +") + coords2 + + std::wstring(L" @@\n"); + // Escape the body of the patch with %xx notation. + for(Diff aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + text += std::wstring(L"+"); + break; + case DELETE: + text += std::wstring(L"-"); + break; + case EQUAL: + text += std::wstring(L" "); + break; + } + text += std::url_encode(aDiff.text, " !~*'();/?:@&=+$,#") + + std::wstring(L"\n"); + } + + return text; } @@ -162,1944 +173,1977 @@ QString Patch::toString() { ///////////////////////////////////////////// diff_match_patch::diff_match_patch() : - Diff_Timeout(1.0f), - Diff_EditCost(4), - Match_Threshold(0.5f), - Match_Distance(1000), - Patch_DeleteThreshold(0.5f), - Patch_Margin(4), - Match_MaxBits(32) { + Diff_Timeout(1.0f), + Diff_EditCost(4), + Match_Threshold(0.5f), + Match_Distance(1000), + Patch_DeleteThreshold(0.5f), + Patch_Margin(4), + Match_MaxBits(32) { } -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2) { - return diff_main(text1, text2, true); +std::vector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2) { + return diff_main(text1, text2, true); } -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2, bool checklines) { - // Set a deadline by which time the diff must be complete. - clock_t deadline; - if (Diff_Timeout <= 0) { - deadline = std::numeric_limits::max(); - } else { - deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); - } - return diff_main(text1, text2, checklines, deadline); +std::vector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2, bool checklines) { + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if (Diff_Timeout <= 0) { + deadline = std::numeric_limits::max(); + } else { + deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); + } + return diff_main(text1, text2, checklines, deadline); } -QList diff_match_patch::diff_main(const QString &text1, - const QString &text2, bool checklines, clock_t deadline) { - // Check for null inputs. - if (text1.isNull() || text2.isNull()) { - throw "Null inputs. (diff_main)"; - } +std::vector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2, bool checklines, clock_t deadline) { + // Check for null inputs. + if (text1.empty() || text2.empty()) { + throw "Null inputs. (diff_main)"; + } + + // Check for equality (speedup). + std::vector diffs; + if (text1 == text2) { + if (!text1.empty()) { + diffs.push_back(Diff(EQUAL, text1)); + } + return diffs; + } + + // Trim off common prefix (speedup). + int commonlength = diff_commonPrefix(text1, text2); + const std::wstring &commonprefix = text1.substr(0, commonlength); + std::wstring textChopped1 = text1.substr(commonlength); + std::wstring textChopped2 = text2.substr(commonlength); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(textChopped1, textChopped2); + const std::wstring &commonsuffix = textChopped1.substr(textChopped1.length() - commonlength); + textChopped1 = textChopped1.substr(0, textChopped1.length() - commonlength); + textChopped2 = textChopped2.substr(0, textChopped2.length() - commonlength); - // Check for equality (speedup). - QList diffs; - if (text1 == text2) { - if (!text1.isEmpty()) { - diffs.append(Diff(EQUAL, text1)); + // Compute the diff on the middle block. + diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); + + // Restore the prefix and suffix. + if (!commonprefix.empty()) { + diffs.insert(diffs.begin(), Diff(EQUAL, commonprefix)); + } + if (!commonsuffix.empty()) { + diffs.push_back(Diff(EQUAL, commonsuffix)); } + + diff_cleanupMerge(diffs); + return diffs; - } - - // Trim off common prefix (speedup). - int commonlength = diff_commonPrefix(text1, text2); - const QString &commonprefix = text1.left(commonlength); - QString textChopped1 = text1.mid(commonlength); - QString textChopped2 = text2.mid(commonlength); - - // Trim off common suffix (speedup). - commonlength = diff_commonSuffix(textChopped1, textChopped2); - const QString &commonsuffix = textChopped1.right(commonlength); - textChopped1 = textChopped1.left(textChopped1.length() - commonlength); - textChopped2 = textChopped2.left(textChopped2.length() - commonlength); - - // Compute the diff on the middle block. - diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); - - // Restore the prefix and suffix. - if (!commonprefix.isEmpty()) { - diffs.prepend(Diff(EQUAL, commonprefix)); - } - if (!commonsuffix.isEmpty()) { - diffs.append(Diff(EQUAL, commonsuffix)); - } - - diff_cleanupMerge(diffs); - - return diffs; } -QList diff_match_patch::diff_compute(QString text1, QString text2, - bool checklines, clock_t deadline) { - QList diffs; +std::vector diff_match_patch::diff_compute(std::wstring text1, std::wstring text2, + bool checklines, clock_t deadline) { + std::vector diffs; - if (text1.isEmpty()) { - // Just add some text (speedup). - diffs.append(Diff(INSERT, text2)); - return diffs; - } + if (text1.empty()) { + // Just add some text (speedup). + diffs.push_back(Diff(INSERT, text2)); + return diffs; + } - if (text2.isEmpty()) { - // Just delete some text (speedup). - diffs.append(Diff(DELETE, text1)); - return diffs; - } - - { - const QString longtext = text1.length() > text2.length() ? text1 : text2; - const QString shorttext = text1.length() > text2.length() ? text2 : text1; - const int i = longtext.indexOf(shorttext); - if (i != -1) { - // Shorter text is inside the longer text (speedup). - const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; - diffs.append(Diff(op, longtext.left(i))); - diffs.append(Diff(EQUAL, shorttext)); - diffs.append(Diff(op, safeMid(longtext, i + shorttext.length()))); - return diffs; - } - - if (shorttext.length() == 1) { - // Single character string. - // After the previous speedup, the character can't be an equality. - diffs.append(Diff(DELETE, text1)); - diffs.append(Diff(INSERT, text2)); - return diffs; - } - // Garbage collect longtext and shorttext by scoping out. - } - - // Check to see if the problem can be split in two. - const QStringList hm = diff_halfMatch(text1, text2); - if (hm.count() > 0) { - // A half-match was found, sort out the return data. - const QString text1_a = hm[0]; - const QString text1_b = hm[1]; - const QString text2_a = hm[2]; - const QString text2_b = hm[3]; - const QString mid_common = hm[4]; - // Send both pairs off for separate processing. - const QList diffs_a = diff_main(text1_a, text2_a, - checklines, deadline); - const QList diffs_b = diff_main(text1_b, text2_b, - checklines, deadline); - // Merge the results. - diffs = diffs_a; - diffs.append(Diff(EQUAL, mid_common)); - diffs += diffs_b; - return diffs; - } + if (text2.empty()) { + // Just delete some text (speedup). + diffs.push_back(Diff(DELETE, text1)); + return diffs; + } - // Perform a real diff. - if (checklines && text1.length() > 100 && text2.length() > 100) { - return diff_lineMode(text1, text2, deadline); - } + { + const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; + const std::wstring shorttext = text1.length() > text2.length() ? text2 : text1; + const auto i = longtext.find(shorttext); + if (i != std::wstring::npos) { + // Shorter text is inside the longer text (speedup). + const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; + diffs.push_back(Diff(op, longtext.substr(0, i))); + diffs.push_back(Diff(EQUAL, shorttext)); + diffs.push_back(Diff(op, safeMid(longtext, i + shorttext.length()))); + return diffs; + } - return diff_bisect(text1, text2, deadline); + if (shorttext.length() == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.push_back(Diff(DELETE, text1)); + diffs.push_back(Diff(INSERT, text2)); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const std::wstring_list hm = diff_halfMatch(text1, text2); + if (hm.size() > 0) { + // A half-match was found, sort out the return data. + const std::wstring text1_a = hm[0]; + const std::wstring text1_b = hm[1]; + const std::wstring text2_a = hm[2]; + const std::wstring text2_b = hm[3]; + const std::wstring mid_common = hm[4]; + // Send both pairs off for separate processing. + const std::vector diffs_a = diff_main(text1_a, text2_a, + checklines, deadline); + const std::vector diffs_b = diff_main(text1_b, text2_b, + checklines, deadline); + // Merge the results. + diffs = diffs_a; + diffs.push_back(Diff(EQUAL, mid_common)); + diffs.insert(std::end(diffs), std::begin(diffs_b), std::end(diffs_b)); + return diffs; + } + + // Perform a real diff. + if (checklines && text1.length() > 100 && text2.length() > 100) { + return diff_lineMode(text1, text2, deadline); + } + + return diff_bisect(text1, text2, deadline); } -QList diff_match_patch::diff_lineMode(QString text1, QString text2, - clock_t deadline) { - // Scan the text on a line-by-line basis first. - const QList b = diff_linesToChars(text1, text2); - text1 = b[0].toString(); - text2 = b[1].toString(); - QStringList linearray = b[2].toStringList(); - - QList diffs = diff_main(text1, text2, false, deadline); - - // Convert the diff back to original text. - diff_charsToLines(diffs, linearray); - // Eliminate freak matches (e.g. blank lines) - diff_cleanupSemantic(diffs); - - // Rediff any replacement blocks, this time character-by-character. - // Add a dummy entry at the end. - diffs.append(Diff(EQUAL, "")); - int count_delete = 0; - int count_insert = 0; - QString text_delete = ""; - QString text_insert = ""; - - QMutableListIterator pointer(diffs); - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - while (thisDiff != NULL) { - switch (thisDiff->operation) { - case INSERT: - count_insert++; - text_insert += thisDiff->text; - break; - case DELETE: - count_delete++; - text_delete += thisDiff->text; - break; - case EQUAL: - // Upon reaching an equality, check for prior redundancies. - if (count_delete >= 1 && count_insert >= 1) { - // Delete the offending records and add the merged ones. - pointer.previous(); - for (int j = 0; j < count_delete + count_insert; j++) { - pointer.previous(); - pointer.remove(); - } - foreach(Diff newDiff, - diff_main(text_delete, text_insert, false, deadline)) { - pointer.insert(newDiff); - } +std::vector diff_match_patch::diff_lineMode(std::wstring text1, std::wstring text2, + clock_t deadline) { + // Scan the text on a line-by-line basis first. + const std::vector b = diff_linesToChars(text1, text2); + text1 = std::get(b[0]); + text2 = std::get(b[1]); + std::wstring_list linearray = std::get(b[2]); + + std::vector diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.push_back(Diff(EQUAL, L"")); + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete = L""; + std::wstring text_insert = L""; + + std::vector::iterator pointer = diffs.begin(); + while (pointer != diffs.end()) { + Diff *thisDiff = &(*pointer); + switch (thisDiff->operation) { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + std::prev(pointer); + for (int j = 0; j < count_delete + count_insert; j++) { + diffs.erase(pointer - 1); + } + for(Diff newDiff: + diff_main(text_delete, text_insert, false, deadline)) { + diffs.insert(pointer, newDiff); + } + } + count_insert = 0; + count_delete = 0; + text_delete = L""; + text_insert = L""; + break; } - count_insert = 0; - count_delete = 0; - text_delete = ""; - text_insert = ""; - break; + + pointer++; } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - diffs.removeLast(); // Remove the dummy entry at the end. + diffs.erase(diffs.end() - 1); - return diffs; + return diffs; } -QList diff_match_patch::diff_bisect(const QString &text1, - const QString &text2, clock_t deadline) { - // Cache the text lengths to prevent multiple calls. - const int text1_length = text1.length(); - const int text2_length = text2.length(); - const int max_d = (text1_length + text2_length + 1) / 2; - const int v_offset = max_d; - const int v_length = 2 * max_d; - int *v1 = new int[v_length]; - int *v2 = new int[v_length]; - for (int x = 0; x < v_length; x++) { - v1[x] = -1; - v2[x] = -1; - } - v1[v_offset + 1] = 0; - v2[v_offset + 1] = 0; - const int delta = text1_length - text2_length; - // If the total number of characters is odd, then the front path will - // collide with the reverse path. - const bool front = (delta % 2 != 0); - // Offsets for start and end of k loop. - // Prevents mapping of space beyond the grid. - int k1start = 0; - int k1end = 0; - int k2start = 0; - int k2end = 0; - for (int d = 0; d < max_d; d++) { - // Bail out if deadline is reached. - if (clock() > deadline) { - break; - } - - // Walk the front path one step. - for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { - const int k1_offset = v_offset + k1; - int x1; - if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { - x1 = v1[k1_offset + 1]; - } else { - x1 = v1[k1_offset - 1] + 1; - } - int y1 = x1 - k1; - while (x1 < text1_length && y1 < text2_length - && text1[x1] == text2[y1]) { - x1++; - y1++; - } - v1[k1_offset] = x1; - if (x1 > text1_length) { - // Ran off the right of the graph. - k1end += 2; - } else if (y1 > text2_length) { - // Ran off the bottom of the graph. - k1start += 2; - } else if (front) { - int k2_offset = v_offset + delta - k1; - if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { - // Mirror x2 onto top-left coordinate system. - int x2 = text1_length - v2[k2_offset]; - if (x1 >= x2) { - // Overlap detected. - delete [] v1; - delete [] v2; - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } +std::vector diff_match_patch::diff_bisect(const std::wstring &text1, + const std::wstring &text2, clock_t deadline) { + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int max_d = (text1_length + text2_length + 1) / 2; + const int v_offset = max_d; + const int v_length = 2 * max_d; + int *v1 = new int[v_length]; + int *v2 = new int[v_length]; + for (int x = 0; x < v_length; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + const int delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + const bool front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int k1start = 0; + int k1end = 0; + int k2start = 0; + int k2end = 0; + for (int d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if (clock() > deadline) { + break; } - } - } - - // Walk the reverse path one step. - for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { - const int k2_offset = v_offset + k2; - int x2; - if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { - x2 = v2[k2_offset + 1]; - } else { - x2 = v2[k2_offset - 1] + 1; - } - int y2 = x2 - k2; - while (x2 < text1_length && y2 < text2_length - && text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1]) { - x2++; - y2++; - } - v2[k2_offset] = x2; - if (x2 > text1_length) { - // Ran off the left of the graph. - k2end += 2; - } else if (y2 > text2_length) { - // Ran off the top of the graph. - k2start += 2; - } else if (!front) { - int k1_offset = v_offset + delta - k2; - if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { - int x1 = v1[k1_offset]; - int y1 = v_offset + x1 - k1_offset; - // Mirror x2 onto top-left coordinate system. - x2 = text1_length - x2; - if (x1 >= x2) { - // Overlap detected. - delete [] v1; - delete [] v2; - return diff_bisectSplit(text1, text2, x1, y1, deadline); - } + + // Walk the front path one step. + for (int k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + const int k1_offset = v_offset + k1; + int x1; + if (k1 == -d || (k1 != d && v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int y1 = x1 - k1; + while (x1 < text1_length && y1 < text2_length + && text1[x1] == text2[y1]) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + int k2_offset = v_offset + delta - k1; + if (k2_offset >= 0 && k2_offset < v_length && v2[k2_offset] != -1) { + // Mirror x2 onto top-left coordinate system. + int x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + delete [] v1; + delete [] v2; + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (int k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + const int k2_offset = v_offset + k2; + int x2; + if (k2 == -d || (k2 != d && v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + int y2 = x2 - k2; + while (x2 < text1_length && y2 < text2_length + && text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1]) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + int k1_offset = v_offset + delta - k2; + if (k1_offset >= 0 && k1_offset < v_length && v1[k1_offset] != -1) { + int x1 = v1[k1_offset]; + int y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - x2; + if (x1 >= x2) { + // Overlap detected. + delete [] v1; + delete [] v2; + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } } - } - } - } - delete [] v1; - delete [] v2; - // Diff took too long and hit the deadline or - // number of diffs equals number of characters, no commonality at all. - QList diffs; - diffs.append(Diff(DELETE, text1)); - diffs.append(Diff(INSERT, text2)); - return diffs; + } + delete [] v1; + delete [] v2; + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + std::vector diffs; + diffs.push_back(Diff(DELETE, text1)); + diffs.push_back(Diff(INSERT, text2)); + return diffs; } -QList diff_match_patch::diff_bisectSplit(const QString &text1, - const QString &text2, int x, int y, clock_t deadline) { - QString text1a = text1.left(x); - QString text2a = text2.left(y); - QString text1b = safeMid(text1, x); - QString text2b = safeMid(text2, y); +std::vector diff_match_patch::diff_bisectSplit(const std::wstring &text1, + const std::wstring &text2, int x, int y, clock_t deadline) { + std::wstring text1a = text1.substr(0, x); + std::wstring text2a = text2.substr(0, y); + std::wstring text1b = safeMid(text1, x); + std::wstring text2b = safeMid(text2, y); + + // Compute both diffs serially. + std::vector diffs = diff_main(text1a, text2a, false, deadline); + std::vector diffsb = diff_main(text1b, text2b, false, deadline); - // Compute both diffs serially. - QList diffs = diff_main(text1a, text2a, false, deadline); - QList diffsb = diff_main(text1b, text2b, false, deadline); + diffs.insert(std::end(diffs), std::begin(diffsb), std::end(diffsb)); - return diffs + diffsb; + return diffs; } -QList diff_match_patch::diff_linesToChars(const QString &text1, - const QString &text2) { - QStringList lineArray; - QMap lineHash; - // e.g. linearray[4] == "Hello\n" - // e.g. linehash.get("Hello\n") == 4 - - // "\x00" is a valid character, but various debuggers don't like it. - // So we'll insert a junk entry to avoid generating a null character. - lineArray.append(""); - - const QString chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); - const QString chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); - - QList listRet; - listRet.append(QVariant::fromValue(chars1)); - listRet.append(QVariant::fromValue(chars2)); - listRet.append(QVariant::fromValue(lineArray)); - return listRet; +std::vector diff_match_patch::diff_linesToChars(const std::wstring &text1, + const std::wstring &text2) { + std::wstring_list lineArray; + std::unordered_map lineHash; + // e.g. linearray[4] == L"Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a null character. + lineArray.push_back(L""); + + const std::wstring chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); + const std::wstring chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); + + std::vector listRet; + listRet.push_back(std::dmp_variant{chars1}); + listRet.push_back(std::dmp_variant{chars2}); + listRet.push_back(std::dmp_variant{lineArray}); + return listRet; } -QString diff_match_patch::diff_linesToCharsMunge(const QString &text, - QStringList &lineArray, - QMap &lineHash) { - int lineStart = 0; - int lineEnd = -1; - QString line; - QString chars; - // Walk the text, pulling out a substring for each line. - // text.split('\n') would would temporarily double our memory footprint. - // Modifying text would create many large strings to garbage collect. - while (lineEnd < text.length() - 1) { - lineEnd = text.indexOf('\n', lineStart); - if (lineEnd == -1) { - lineEnd = text.length() - 1; - } - line = safeMid(text, lineStart, lineEnd + 1 - lineStart); - lineStart = lineEnd + 1; - - if (lineHash.contains(line)) { - chars += QChar(static_cast(lineHash.value(line))); - } else { - lineArray.append(line); - lineHash.insert(line, lineArray.size() - 1); - chars += QChar(static_cast(lineArray.size() - 1)); +std::wstring diff_match_patch::diff_linesToCharsMunge(const std::wstring &text, + std::wstring_list &lineArray, + std::unordered_map &lineHash) { + int lineStart = 0; + int lineEnd = -1; + std::wstring line; + std::wstring chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + while (lineEnd < text.length() - 1) { + lineEnd = text.find(L'\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length() - 1; + } + line = safeMid(text, lineStart, lineEnd + 1 - lineStart); + lineStart = lineEnd + 1; + + if (lineHash.find(line) != lineHash.end()) { + chars += char(static_cast(lineHash[line])); + } else { + lineArray.push_back(line); + lineHash.emplace(line, lineArray.size() - 1); + chars += char(static_cast(lineArray.size() - 1)); + } } - } - return chars; + return chars; } -void diff_match_patch::diff_charsToLines(QList &diffs, - const QStringList &lineArray) { - // Qt has no mutable foreach construct. - QMutableListIterator i(diffs); - while (i.hasNext()) { - Diff &diff = i.next(); - QString text; - for (int y = 0; y < diff.text.length(); y++) { - text += lineArray.value(static_cast(diff.text[y].unicode())); +void diff_match_patch::diff_charsToLines(std::vector &diffs, + const std::wstring_list &lineArray) { + std::vector::iterator i = diffs.begin(); + while (i != diffs.end()) { + Diff &diff = *i; + std::wstring text; + for (int y = 0; y < diff.text.length(); y++) { + text += lineArray.at(static_cast(diff.text[y])); + } + diff.text = text; + i++; } - diff.text = text; - } } -int diff_match_patch::diff_commonPrefix(const QString &text1, - const QString &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const int n = std::min(text1.length(), text2.length()); - for (int i = 0; i < n; i++) { - if (text1[i] != text2[i]) { - return i; +int diff_match_patch::diff_commonPrefix(const std::wstring &text1, + const std::wstring &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int n = std::min(text1.length(), text2.length()); + for (int i = 0; i < n; i++) { + if (text1[i] != text2[i]) { + return i; + } } - } - return n; + return n; } -int diff_match_patch::diff_commonSuffix(const QString &text1, - const QString &text2) { - // Performance analysis: http://neil.fraser.name/news/2007/10/09/ - const int text1_length = text1.length(); - const int text2_length = text2.length(); - const int n = std::min(text1_length, text2_length); - for (int i = 1; i <= n; i++) { - if (text1[text1_length - i] != text2[text2_length - i]) { - return i - 1; +int diff_match_patch::diff_commonSuffix(const std::wstring &text1, + const std::wstring &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const int text1_length = text1.length(); + const int text2_length = text2.length(); + const int n = std::min(text1_length, text2_length); + for (int i = 1; i <= n; i++) { + if (text1[text1_length - i] != text2[text2_length - i]) { + return i - 1; + } } - } - return n; + return n; } -int diff_match_patch::diff_commonOverlap(const QString &text1, - const QString &text2) { - // Cache the text lengths to prevent multiple calls. - const int text1_length = text1.length(); - const int text2_length = text2.length(); - // Eliminate the null case. - if (text1_length == 0 || text2_length == 0) { - return 0; - } - // Truncate the longer string. - QString text1_trunc = text1; - QString text2_trunc = text2; - if (text1_length > text2_length) { - text1_trunc = text1.right(text2_length); - } else if (text1_length < text2_length) { - text2_trunc = text2.left(text1_length); - } - const int text_length = std::min(text1_length, text2_length); - // Quick check for the worst case. - if (text1_trunc == text2_trunc) { - return text_length; - } - - // Start by looking for a single character match - // and increase length until no match is found. - // Performance analysis: http://neil.fraser.name/news/2010/11/04/ - int best = 0; - int length = 1; - while (true) { - QString pattern = text1_trunc.right(length); - int found = text2_trunc.indexOf(pattern); - if (found == -1) { - return best; - } - length += found; - if (found == 0 || text1_trunc.right(length) == text2_trunc.left(length)) { - best = length; - length++; - } - } -} +int diff_match_patch::diff_commonOverlap(const std::wstring &text1, + const std::wstring &text2) { + // Cache the text lengths to prevent multiple calls. + const int text1_length = text1.length(); + const int text2_length = text2.length(); + // Eliminate the null case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + std::wstring text1_trunc = text1; + std::wstring text2_trunc = text2; + if (text1_length > text2_length) { + text1_trunc = text1.substr(text1.length() - text2_length); + } else if (text1_length < text2_length) { + text2_trunc = text2.substr(0, text1_length); + } + const int text_length = std::min(text1_length, text2_length); + // Quick check for the worst case. + if (text1_trunc == text2_trunc) { + return text_length; + } -QStringList diff_match_patch::diff_halfMatch(const QString &text1, - const QString &text2) { - if (Diff_Timeout <= 0) { - // Don't risk returning a non-optimal diff if we have unlimited time. - return QStringList(); - } - const QString longtext = text1.length() > text2.length() ? text1 : text2; - const QString shorttext = text1.length() > text2.length() ? text2 : text1; - if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { - return QStringList(); // Pointless. - } - - // First check if the second quarter is the seed for a half-match. - const QStringList hm1 = diff_halfMatchI(longtext, shorttext, - (longtext.length() + 3) / 4); - // Check again based on the third quarter. - const QStringList hm2 = diff_halfMatchI(longtext, shorttext, - (longtext.length() + 1) / 2); - QStringList hm; - if (hm1.isEmpty() && hm2.isEmpty()) { - return QStringList(); - } else if (hm2.isEmpty()) { - hm = hm1; - } else if (hm1.isEmpty()) { - hm = hm2; - } else { - // Both matched. Select the longest. - hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; - } - - // A half-match was found, sort out the return data. - if (text1.length() > text2.length()) { - return hm; - } else { - QStringList listRet; - listRet << hm[2] << hm[3] << hm[0] << hm[1] << hm[4]; - return listRet; - } + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + int best = 0; + int length = 1; + while (true) { + std::wstring pattern = text1_trunc.substr(text1_trunc.length() - length); + int found = text2_trunc.find(pattern); + if (found == std::wstring::npos) { + return best; + } + length += found; + if (found == 0 || text1_trunc.substr(text1_trunc.length() - length) == text2_trunc.substr(0, length)) { + best = length; + length++; + } + } } +std::wstring_list diff_match_patch::diff_halfMatch(const std::wstring &text1, + const std::wstring &text2) { + if (Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return std::wstring_list(); + } + const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; + const std::wstring shorttext = text1.length() > text2.length() ? text2 : text1; + if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { + return std::wstring_list(); // Pointless. + } -QStringList diff_match_patch::diff_halfMatchI(const QString &longtext, - const QString &shorttext, - int i) { - // Start with a 1/4 length substring at position i as a seed. - const QString seed = safeMid(longtext, i, longtext.length() / 4); - int j = -1; - QString best_common; - QString best_longtext_a, best_longtext_b; - QString best_shorttext_a, best_shorttext_b; - while ((j = shorttext.indexOf(seed, j + 1)) != -1) { - const int prefixLength = diff_commonPrefix(safeMid(longtext, i), - safeMid(shorttext, j)); - const int suffixLength = diff_commonSuffix(longtext.left(i), - shorttext.left(j)); - if (best_common.length() < suffixLength + prefixLength) { - best_common = safeMid(shorttext, j - suffixLength, suffixLength) - + safeMid(shorttext, j, prefixLength); - best_longtext_a = longtext.left(i - suffixLength); - best_longtext_b = safeMid(longtext, i + prefixLength); - best_shorttext_a = shorttext.left(j - suffixLength); - best_shorttext_b = safeMid(shorttext, j + prefixLength); - } - } - if (best_common.length() * 2 >= longtext.length()) { - QStringList listRet; - listRet << best_longtext_a << best_longtext_b << best_shorttext_a - << best_shorttext_b << best_common; - return listRet; - } else { - return QStringList(); - } + // First check if the second quarter is the seed for a half-match. + const std::wstring_list hm1 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 3) / 4); + // Check again based on the third quarter. + const std::wstring_list hm2 = diff_halfMatchI(longtext, shorttext, + (longtext.length() + 1) / 2); + std::wstring_list hm; + if (hm1.empty() && hm2.empty()) { + return std::wstring_list(); + } else if (hm2.empty()) { + hm = hm1; + } else if (hm1.empty()) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.length() > text2.length()) { + return hm; + } else { + std::wstring_list listRet; + listRet.push_back(hm[2]); + listRet.push_back(hm[3]); + listRet.push_back(hm[0]); + listRet.push_back(hm[1]); + listRet.push_back(hm[4]); + return listRet; + } } -void diff_match_patch::diff_cleanupSemantic(QList &diffs) { - if (diffs.isEmpty()) { - return; - } - bool changes = false; - QStack equalities; // Stack of equalities. - QString lastequality; // Always equal to equalities.lastElement().text - QMutableListIterator pointer(diffs); - // Number of characters that changed prior to the equality. - int length_insertions1 = 0; - int length_deletions1 = 0; - // Number of characters that changed after the equality. - int length_insertions2 = 0; - int length_deletions2 = 0; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - while (thisDiff != NULL) { - if (thisDiff->operation == EQUAL) { - // Equality found. - equalities.push(*thisDiff); - length_insertions1 = length_insertions2; - length_deletions1 = length_deletions2; - length_insertions2 = 0; - length_deletions2 = 0; - lastequality = thisDiff->text; - } else { - // An insertion or deletion. - if (thisDiff->operation == INSERT) { - length_insertions2 += thisDiff->text.length(); - } else { - length_deletions2 += thisDiff->text.length(); - } - // Eliminate an equality that is smaller or equal to the edits on both - // sides of it. - if (!lastequality.isNull() - && (lastequality.length() - <= std::max(length_insertions1, length_deletions1)) - && (lastequality.length() - <= std::max(length_insertions2, length_deletions2))) { - // printf("Splitting: '%s'\n", qPrintable(lastequality)); - // Walk back to offending equality. - while (*thisDiff != equalities.top()) { - thisDiff = &pointer.previous(); +std::wstring_list diff_match_patch::diff_halfMatchI(const std::wstring &longtext, + const std::wstring &shorttext, + int i) { + // Start with a 1/4 length substring at position i as a seed. + const std::wstring seed = safeMid(longtext, i, longtext.length() / 4); + int j = -1; + std::wstring best_common; + std::wstring best_longtext_a, best_longtext_b; + std::wstring best_shorttext_a, best_shorttext_b; + while ((j = shorttext.find(seed, j + 1)) != std::wstring::npos) { + const int prefixLength = diff_commonPrefix(safeMid(longtext, i), + safeMid(shorttext, j)); + const int suffixLength = diff_commonSuffix(longtext.substr(0, i), + shorttext.substr(0, j)); + if (best_common.length() < suffixLength + prefixLength) { + best_common = safeMid(shorttext, j - suffixLength, suffixLength) + + safeMid(shorttext, j, prefixLength); + best_longtext_a = longtext.substr(0, i - suffixLength); + best_longtext_b = safeMid(longtext, i + prefixLength); + best_shorttext_a = shorttext.substr(0, j - suffixLength); + best_shorttext_b = safeMid(shorttext, j + prefixLength); } - pointer.next(); + } + if (best_common.length() * 2 >= longtext.length()) { + std::wstring_list listRet; + listRet.push_back(best_longtext_a); + listRet.push_back(best_longtext_b); + listRet.push_back(best_shorttext_a); + listRet.push_back(best_shorttext_b); + listRet.push_back(best_common); + return listRet; + } else { + return std::wstring_list(); + } +} - // Replace equality with a delete. - pointer.setValue(Diff(DELETE, lastequality)); - // Insert a corresponding an insert. - pointer.insert(Diff(INSERT, lastequality)); - equalities.pop(); // Throw away the equality we just deleted. - if (!equalities.isEmpty()) { - // Throw away the previous equality (it needs to be reevaluated). - equalities.pop(); - } - if (equalities.isEmpty()) { - // There are no previous equalities, walk back to the start. - while (pointer.hasPrevious()) { - pointer.previous(); - } +void diff_match_patch::diff_cleanupSemantic(std::vector &diffs) { + if (diffs.empty()) { + return; + } + bool changes = false; + std::deque equalities; // Stack of equalities. + std::wstring lastequality; // Always equal to equalities.lastElement().text + std::vector::iterator pointer = diffs.begin(); + // Number of characters that changed prior to the equality. + int length_insertions1 = 0; + int length_deletions1 = 0; + // Number of characters that changed after the equality. + int length_insertions2 = 0; + int length_deletions2 = 0; + while (pointer != diffs.end()) { + Diff *thisDiff = &(*pointer); + if (thisDiff->operation == EQUAL) { + // Equality found. + equalities.push_front(*thisDiff); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = thisDiff->text; } else { - // There is a safe equality we can fall back to. - thisDiff = &equalities.top(); - while (*thisDiff != pointer.previous()) { - // Intentionally empty loop. - } + // An insertion or deletion. + if (thisDiff->operation == INSERT) { + length_insertions2 += thisDiff->text.length(); + } else { + length_deletions2 += thisDiff->text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (!lastequality.empty() + && (lastequality.length() + <= std::max(length_insertions1, length_deletions1)) + && (lastequality.length() + <= std::max(length_insertions2, length_deletions2))) { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while (*thisDiff != equalities.front()) { + pointer--; + thisDiff = &(*pointer); + } + pointer++; + + // Replace equality with a delete. + *pointer = Diff(DELETE, lastequality); + // Insert a corresponding an insert. + diffs.insert(pointer, Diff(INSERT, lastequality)); + + equalities.pop_front(); // Throw away the equality we just deleted. + if (!equalities.empty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop_front(); + } + if (equalities.empty()) { + // There are no previous equalities, walk back to the start. + pointer = diffs.begin(); + } else { + // There is a safe equality we can fall back to. + thisDiff = &(equalities.front()); + while (*thisDiff != *(pointer--)) { + // Intentionally empty loop. + } + } + + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastequality = std::wstring(); + changes = true; + } } - length_insertions1 = 0; // Reset the counters. - length_deletions1 = 0; - length_insertions2 = 0; - length_deletions2 = 0; - lastequality = QString(); - changes = true; - } + pointer++; } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - // Normalize the diff. - if (changes) { - diff_cleanupMerge(diffs); - } - diff_cleanupSemanticLossless(diffs); - - // Find any overlaps between deletions and insertions. - // e.g: abcxxxxxxdef - // -> abcxxxdef - // e.g: xxxabcdefxxx - // -> defxxxabc - // Only extract an overlap if it is as big as the edit ahead or behind it. - pointer.toFront(); - Diff *prevDiff = NULL; - thisDiff = NULL; - if (pointer.hasNext()) { - prevDiff = &pointer.next(); - if (pointer.hasNext()) { - thisDiff = &pointer.next(); - } - } - while (thisDiff != NULL) { - if (prevDiff->operation == DELETE && - thisDiff->operation == INSERT) { - QString deletion = prevDiff->text; - QString insertion = thisDiff->text; - int overlap_length1 = diff_commonOverlap(deletion, insertion); - int overlap_length2 = diff_commonOverlap(insertion, deletion); - if (overlap_length1 >= overlap_length2) { - if (overlap_length1 >= deletion.length() / 2.0 || - overlap_length1 >= insertion.length() / 2.0) { - // Overlap found. Insert an equality and trim the surrounding edits. - pointer.previous(); - pointer.insert(Diff(EQUAL, insertion.left(overlap_length1))); - prevDiff->text = - deletion.left(deletion.length() - overlap_length1); - thisDiff->text = safeMid(insertion, overlap_length1); - // pointer.insert inserts the element before the cursor, so there is - // no need to step past the new element. + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = diffs.begin(); + Diff *prevDiff = NULL; + Diff *thisDiff = NULL; + if (pointer != diffs.end()) { + prevDiff = &(*pointer); + pointer++; + if (pointer != diffs.end()) { + thisDiff = &(*pointer); + pointer++; } - } else { - if (overlap_length2 >= deletion.length() / 2.0 || - overlap_length2 >= insertion.length() / 2.0) { - // Reverse overlap found. - // Insert an equality and swap and trim the surrounding edits. - pointer.previous(); - pointer.insert(Diff(EQUAL, deletion.left(overlap_length2))); - prevDiff->operation = INSERT; - prevDiff->text = - insertion.left(insertion.length() - overlap_length2); - thisDiff->operation = DELETE; - thisDiff->text = safeMid(deletion, overlap_length2); - // pointer.insert inserts the element before the cursor, so there is - // no need to step past the new element. + } + while (thisDiff != NULL) { + if (prevDiff->operation == DELETE && + thisDiff->operation == INSERT) { + std::wstring deletion = prevDiff->text; + std::wstring insertion = thisDiff->text; + int overlap_length1 = diff_commonOverlap(deletion, insertion); + int overlap_length2 = diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length() / 2.0 || + overlap_length1 >= insertion.length() / 2.0) { + // Overlap found. Insert an equality and trim the surrounding edits. + pointer--; + diffs.insert(pointer, Diff(EQUAL, insertion.substr(0, overlap_length1))); + prevDiff->text = + deletion.substr(0, deletion.length() - overlap_length1); + thisDiff->text = safeMid(insertion, overlap_length1); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } else { + if (overlap_length2 >= deletion.length() / 2.0 || + overlap_length2 >= insertion.length() / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + pointer--; + diffs.insert(pointer, Diff(EQUAL, deletion.substr(0, overlap_length2))); + prevDiff->operation = INSERT; + prevDiff->text = + insertion.substr(0, insertion.length() - overlap_length2); + thisDiff->operation = DELETE; + thisDiff->text = safeMid(deletion, overlap_length2); + // pointer.insert inserts the element before the cursor, so there is + // no need to step past the new element. + } + } + + if (pointer != diffs.end()) + pointer++; + + thisDiff = pointer != diffs.end() ? &(*pointer) : NULL; } - } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; + prevDiff = thisDiff; + if (pointer != diffs.end()) + pointer++; + thisDiff = pointer != diffs.end() ? &(*pointer) : NULL; } - prevDiff = thisDiff; - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } } +namespace std { +template +T * safe_next_element(std::vector v, typename std::vector::iterator & it) { + if (it != std::prev(v.end())) return &(*std::next(it)); -void diff_match_patch::diff_cleanupSemanticLossless(QList &diffs) { - QString equality1, edit, equality2; - QString commonString; - int commonOffset; - int score, bestScore; - QString bestEquality1, bestEdit, bestEquality2; - // Create a new iterator at the start. - QMutableListIterator pointer(diffs); - Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - - // Intentionally ignore the first and last element (don't need checking). - while (nextDiff != NULL) { - if (prevDiff->operation == EQUAL && - nextDiff->operation == EQUAL) { - // This is a single edit surrounded by equalities. - equality1 = prevDiff->text; - edit = thisDiff->text; - equality2 = nextDiff->text; - - // First, shift the edit as far left as possible. - commonOffset = diff_commonSuffix(equality1, edit); - if (commonOffset != 0) { - commonString = safeMid(edit, edit.length() - commonOffset); - equality1 = equality1.left(equality1.length() - commonOffset); - edit = commonString + edit.left(edit.length() - commonOffset); - equality2 = commonString + equality2; - } + return nullptr; +} +} - // Second, step character by character right, looking for the best fit. - bestEquality1 = equality1; - bestEdit = edit; - bestEquality2 = equality2; - bestScore = diff_cleanupSemanticScore(equality1, edit) - + diff_cleanupSemanticScore(edit, equality2); - while (!edit.isEmpty() && !equality2.isEmpty() - && edit[0] == equality2[0]) { - equality1 += edit[0]; - edit = safeMid(edit, 1) + equality2[0]; - equality2 = safeMid(equality2, 1); - score = diff_cleanupSemanticScore(equality1, edit) - + diff_cleanupSemanticScore(edit, equality2); - // The >= encourages trailing rather than leading whitespace on edits. - if (score >= bestScore) { - bestScore = score; + +void diff_match_patch::diff_cleanupSemanticLossless(std::vector &diffs) { + std::wstring equality1, edit, equality2; + std::wstring commonString; + int commonOffset; + int score, bestScore; + std::wstring bestEquality1, bestEdit, bestEquality2; + // Create a new iterator at the start. + std::vector::iterator pointer = diffs.begin(); + Diff *prevDiff = std::safe_next_element(diffs, pointer); + Diff *thisDiff = std::safe_next_element(diffs, pointer); + Diff *nextDiff = std::safe_next_element(diffs, pointer); + + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != NULL) { + if (prevDiff->operation == EQUAL && + nextDiff->operation == EQUAL) { + // This is a single edit surrounded by equalities. + equality1 = prevDiff->text; + edit = thisDiff->text; + equality2 = nextDiff->text; + + // First, shift the edit as far left as possible. + commonOffset = diff_commonSuffix(equality1, edit); + if (commonOffset != 0) { + commonString = safeMid(edit, edit.length() - commonOffset); + equality1 = equality1.substr(0, equality1.length() - commonOffset); + edit = commonString + edit.substr(0, edit.length() - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, looking for the best fit. bestEquality1 = equality1; bestEdit = edit; bestEquality2 = equality2; - } - } + bestScore = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + while (!edit.empty() && !equality2.empty() + && edit[0] == equality2[0]) { + equality1 += edit[0]; + edit = safeMid(edit, 1) + equality2[0]; + equality2 = safeMid(equality2, 1); + score = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } - if (prevDiff->text != bestEquality1) { - // We have an improvement, save it back to the diff. - if (!bestEquality1.isEmpty()) { - prevDiff->text = bestEquality1; - } else { - pointer.previous(); // Walk past nextDiff. - pointer.previous(); // Walk past thisDiff. - pointer.previous(); // Walk past prevDiff. - pointer.remove(); // Delete prevDiff. - pointer.next(); // Walk past thisDiff. - pointer.next(); // Walk past nextDiff. - } - thisDiff->text = bestEdit; - if (!bestEquality2.isEmpty()) { - nextDiff->text = bestEquality2; - } else { - pointer.remove(); // Delete nextDiff. - nextDiff = thisDiff; - thisDiff = prevDiff; - } + if (prevDiff->text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (!bestEquality1.empty()) { + prevDiff->text = bestEquality1; + } else { + std::advance(pointer, -3); + auto v = pointer + 1; //this diff + diffs.erase(pointer); + pointer = v; //to this diff + std::next(pointer);//pass next diff + } + thisDiff->text = bestEdit; + if (!bestEquality2.empty()) { + nextDiff->text = bestEquality2; + } else { + diffs.erase(pointer); // Delete nextDiff. + nextDiff = thisDiff; + thisDiff = prevDiff; + } + } } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = std::safe_next_element(diffs, pointer); } - prevDiff = thisDiff; - thisDiff = nextDiff; - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - } } -int diff_match_patch::diff_cleanupSemanticScore(const QString &one, - const QString &two) { - if (one.isEmpty() || two.isEmpty()) { - // Edges are the best. - return 6; - } - - // Each port of this function behaves slightly differently due to - // subtle differences in each language's definition of things like - // 'whitespace'. Since this function's purpose is largely cosmetic, - // the choice has been made to use each language's native features - // rather than force total conformity. - QChar char1 = one[one.length() - 1]; - QChar char2 = two[0]; - bool nonAlphaNumeric1 = !char1.isLetterOrNumber(); - bool nonAlphaNumeric2 = !char2.isLetterOrNumber(); - bool whitespace1 = nonAlphaNumeric1 && char1.isSpace(); - bool whitespace2 = nonAlphaNumeric2 && char2.isSpace(); - bool lineBreak1 = whitespace1 && char1.category() == QChar::Other_Control; - bool lineBreak2 = whitespace2 && char2.category() == QChar::Other_Control; - bool blankLine1 = lineBreak1 && BLANKLINEEND.indexIn(one) != -1; - bool blankLine2 = lineBreak2 && BLANKLINESTART.indexIn(two) != -1; - - if (blankLine1 || blankLine2) { - // Five points for blank lines. - return 5; - } else if (lineBreak1 || lineBreak2) { - // Four points for line breaks. - return 4; - } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { - // Three points for end of sentences. - return 3; - } else if (whitespace1 || whitespace2) { - // Two points for whitespace. - return 2; - } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { - // One point for non-alphanumeric. - return 1; - } - return 0; +int diff_match_patch::diff_cleanupSemanticScore(const std::wstring &one, + const std::wstring &two) { + if (one.empty() || two.empty()) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + char char1 = one[one.length() - 1]; + char char2 = two[0]; + bool nonAlphaNumeric1 = !std::isalnum(char1); + bool nonAlphaNumeric2 = !std::isalnum(char2); + bool whitespace1 = nonAlphaNumeric1 && std::isspace(char1); + bool whitespace2 = nonAlphaNumeric2 && std::isspace(char2); + bool lineBreak1 = whitespace1 && std::iscntrl(char1); + bool lineBreak2 = whitespace2 && std::iscntrl(char2); + bool blankLine1 = lineBreak1 && std::regex_match(one, BLANKLINEEND); + bool blankLine2 = lineBreak2 && std::regex_match(two, BLANKLINESTART); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; } // Define some regex patterns for matching boundaries. -QRegExp diff_match_patch::BLANKLINEEND = QRegExp("\\n\\r?\\n$"); -QRegExp diff_match_patch::BLANKLINESTART = QRegExp("^\\r?\\n\\r?\\n"); - - -void diff_match_patch::diff_cleanupEfficiency(QList &diffs) { - if (diffs.isEmpty()) { - return; - } - bool changes = false; - QStack equalities; // Stack of equalities. - QString lastequality; // Always equal to equalities.lastElement().text - QMutableListIterator pointer(diffs); - // Is there an insertion operation before the last equality. - bool pre_ins = false; - // Is there a deletion operation before the last equality. - bool pre_del = false; - // Is there an insertion operation after the last equality. - bool post_ins = false; - // Is there a deletion operation after the last equality. - bool post_del = false; - - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *safeDiff = thisDiff; - - while (thisDiff != NULL) { - if (thisDiff->operation == EQUAL) { - // Equality found. - if (thisDiff->text.length() < Diff_EditCost && (post_ins || post_del)) { - // Candidate found. - equalities.push(*thisDiff); - pre_ins = post_ins; - pre_del = post_del; - lastequality = thisDiff->text; - } else { - // Not a candidate, and can never become one. - equalities.clear(); - lastequality = QString(); - safeDiff = thisDiff; - } - post_ins = post_del = false; - } else { - // An insertion or deletion. - if (thisDiff->operation == DELETE) { - post_del = true; - } else { - post_ins = true; - } - /* - * Five types to be split: - * ABXYCD - * AXCD - * ABXC - * AXCD - * ABXC - */ - if (!lastequality.isNull() - && ((pre_ins && pre_del && post_ins && post_del) - || ((lastequality.length() < Diff_EditCost / 2) - && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) - + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { - // printf("Splitting: '%s'\n", qPrintable(lastequality)); - // Walk back to offending equality. - while (*thisDiff != equalities.top()) { - thisDiff = &pointer.previous(); - } - pointer.next(); - - // Replace equality with a delete. - pointer.setValue(Diff(DELETE, lastequality)); - // Insert a corresponding an insert. - pointer.insert(Diff(INSERT, lastequality)); - thisDiff = &pointer.previous(); - pointer.next(); - - equalities.pop(); // Throw away the equality we just deleted. - lastequality = QString(); - if (pre_ins && pre_del) { - // No changes made which could affect previous entry, keep going. - post_ins = post_del = true; - equalities.clear(); - safeDiff = thisDiff; +std::wregex diff_match_patch::BLANKLINEEND{L"\\n\\r?\\n$"}; +std::wregex diff_match_patch::BLANKLINESTART{L"^\\r?\\n\\r?\\n"}; + + +void diff_match_patch::diff_cleanupEfficiency(std::vector &diffs) { + if (diffs.empty()) { + return; + } + bool changes = false; + std::deque equalities; // Stack of equalities. + std::wstring lastequality; // Always equal to equalities.lastElement().text + std::vector::iterator pointer = diffs.begin(); + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + + Diff *thisDiff = std::safe_next_element(diffs, pointer); + Diff *safeDiff = thisDiff; + + while (thisDiff != NULL) { + if (thisDiff->operation == EQUAL) { + // Equality found. + if (thisDiff->text.length() < Diff_EditCost && (post_ins || post_del)) { + // Candidate found. + equalities.push_front(*thisDiff); + pre_ins = post_ins; + pre_del = post_del; + lastequality = thisDiff->text; + } else { + // Not a candidate, and can never become one. + equalities.clear(); + lastequality = std::wstring(); + safeDiff = thisDiff; + } + post_ins = post_del = false; } else { - if (!equalities.isEmpty()) { - // Throw away the previous equality (it needs to be reevaluated). - equalities.pop(); - } - if (equalities.isEmpty()) { - // There are no previous questionable equalities, - // walk back to the last known safe diff. - thisDiff = safeDiff; - } else { - // There is an equality we can fall back to. - thisDiff = &equalities.top(); - } - while (*thisDiff != pointer.previous()) { - // Intentionally empty loop. - } - post_ins = post_del = false; - } + // An insertion or deletion. + if (thisDiff->operation == DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if (!lastequality.empty() + && ((pre_ins && pre_del && post_ins && post_del) + || ((lastequality.length() < Diff_EditCost / 2) + && ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + + (post_ins ? 1 : 0) + (post_del ? 1 : 0)) == 3))) { + // printf("Splitting: '%s'\n", qPrintable(lastequality)); + // Walk back to offending equality. + while (*thisDiff != equalities.front()) { + thisDiff = &(*std::prev(pointer)); + } + std::next(pointer); + + // Replace equality with a delete. + *pointer = Diff(DELETE, lastequality); + // Insert a corresponding an insert. + diffs.insert(pointer, Diff(INSERT, lastequality)); + thisDiff = &(*std::prev(pointer)); + std::next(pointer); + + equalities.pop_front(); // Throw away the equality we just deleted. + lastequality = std::wstring(); + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities.clear(); + safeDiff = thisDiff; + } else { + if (!equalities.empty()) { + // Throw away the previous equality (it needs to be reevaluated). + equalities.pop_front(); + } + if (equalities.empty()) { + // There are no previous questionable equalities, + // walk back to the last known safe diff. + thisDiff = safeDiff; + } else { + // There is an equality we can fall back to. + thisDiff = &equalities.front(); + } + while (*thisDiff != (*(--pointer))) { + // Intentionally empty loop. + } + post_ins = post_del = false; + } - changes = true; - } + changes = true; + } + } + thisDiff = std::safe_next_element(diffs, pointer); } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - if (changes) { - diff_cleanupMerge(diffs); - } + if (changes) { + diff_cleanupMerge(diffs); + } } -void diff_match_patch::diff_cleanupMerge(QList &diffs) { - diffs.append(Diff(EQUAL, "")); // Add a dummy entry at the end. - QMutableListIterator pointer(diffs); - int count_delete = 0; - int count_insert = 0; - QString text_delete = ""; - QString text_insert = ""; - Diff *thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *prevEqual = NULL; - int commonlength; - while (thisDiff != NULL) { - switch (thisDiff->operation) { - case INSERT: - count_insert++; - text_insert += thisDiff->text; - prevEqual = NULL; - break; - case DELETE: - count_delete++; - text_delete += thisDiff->text; - prevEqual = NULL; - break; - case EQUAL: - if (count_delete + count_insert > 1) { - bool both_types = count_delete != 0 && count_insert != 0; - // Delete the offending records. - pointer.previous(); // Reverse direction. - while (count_delete-- > 0) { - pointer.previous(); - pointer.remove(); - } - while (count_insert-- > 0) { - pointer.previous(); - pointer.remove(); - } - if (both_types) { - // Factor out any common prefixies. - commonlength = diff_commonPrefix(text_insert, text_delete); - if (commonlength != 0) { - if (pointer.hasPrevious()) { - thisDiff = &pointer.previous(); - if (thisDiff->operation != EQUAL) { - throw "Previous diff should have been an equality."; +void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { + diffs.push_back(Diff(EQUAL, L"")); // Add a dummy entry at the end. + std::vector::iterator pointer = diffs.begin(); + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete = L""; + std::wstring text_insert = L""; + Diff *thisDiff = std::safe_next_element(diffs, pointer); + Diff *prevEqual = NULL; + int commonlength; + while (thisDiff != NULL) { + switch (thisDiff->operation) { + case INSERT: + count_insert++; + text_insert += thisDiff->text; + prevEqual = NULL; + break; + case DELETE: + count_delete++; + text_delete += thisDiff->text; + prevEqual = NULL; + break; + case EQUAL: + if (count_delete + count_insert > 1) { + bool both_types = count_delete != 0 && count_insert != 0; + // Delete the offending records. + std::prev(pointer); // Reverse direction. + while (count_delete-- > 0) { + diffs.erase(pointer - 1); } - thisDiff->text += text_insert.left(commonlength); - pointer.next(); - } else { - pointer.insert(Diff(EQUAL, text_insert.left(commonlength))); - } - text_insert = safeMid(text_insert, commonlength); - text_delete = safeMid(text_delete, commonlength); - } - // Factor out any common suffixies. - commonlength = diff_commonSuffix(text_insert, text_delete); - if (commonlength != 0) { - thisDiff = &pointer.next(); - thisDiff->text = safeMid(text_insert, text_insert.length() - - commonlength) + thisDiff->text; - text_insert = text_insert.left(text_insert.length() - - commonlength); - text_delete = text_delete.left(text_delete.length() - - commonlength); - pointer.previous(); + while (count_insert-- > 0) { + diffs.erase(pointer - 1); + } + if (both_types) { + // Factor out any common prefixies. + commonlength = diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if (pointer != diffs.begin()) { + thisDiff = &(*std::prev(pointer)); + if (thisDiff->operation != EQUAL) { + throw "Previous diff should have been an equality."; + } + thisDiff->text += text_insert.substr(0, commonlength); + std::next(pointer); + } else { + diffs.insert(pointer, Diff(EQUAL, text_insert.substr(0, commonlength))); + } + text_insert = safeMid(text_insert, commonlength); + text_delete = safeMid(text_delete, commonlength); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + thisDiff = &(*std::next(pointer)); + thisDiff->text = safeMid(text_insert, text_insert.length() + - commonlength) + thisDiff->text; + text_insert = text_insert.substr(0, text_insert.length() + - commonlength); + text_delete = text_delete.substr(0, text_delete.length() + - commonlength); + std::prev(pointer); + } + } + // Insert the merged records. + if (!text_delete.empty()) { + diffs.insert(pointer, Diff(DELETE, text_delete)); + } + if (!text_insert.empty()) { + diffs.insert(pointer, Diff(INSERT, text_insert)); + } + // Step forward to the equality. + thisDiff = std::safe_next_element(diffs, pointer); + + } else if (prevEqual != NULL) { + // Merge this equality with the previous one. + prevEqual->text += thisDiff->text; + diffs.erase(pointer++); + thisDiff = &(*std::prev(pointer)); + std::next(pointer); // Forward direction } - } - // Insert the merged records. - if (!text_delete.isEmpty()) { - pointer.insert(Diff(DELETE, text_delete)); - } - if (!text_insert.isEmpty()) { - pointer.insert(Diff(INSERT, text_insert)); - } - // Step forward to the equality. - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - - } else if (prevEqual != NULL) { - // Merge this equality with the previous one. - prevEqual->text += thisDiff->text; - pointer.remove(); - thisDiff = &pointer.previous(); - pointer.next(); // Forward direction + count_insert = 0; + count_delete = 0; + text_delete = L""; + text_insert = L""; + prevEqual = thisDiff; + break; } - count_insert = 0; - count_delete = 0; - text_delete = ""; - text_insert = ""; - prevEqual = thisDiff; - break; - } - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - if (diffs.back().text.isEmpty()) { - diffs.removeLast(); // Remove the dummy entry at the end. - } - - /* - * Second pass: look for single edits surrounded on both sides by equalities - * which can be shifted sideways to eliminate an equality. - * e.g: ABAC -> ABAC - */ - bool changes = false; - // Create a new iterator at the start. - // (As opposed to walking the current one back.) - pointer.toFront(); - Diff *prevDiff = pointer.hasNext() ? &pointer.next() : NULL; - thisDiff = pointer.hasNext() ? &pointer.next() : NULL; - Diff *nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - - // Intentionally ignore the first and last element (don't need checking). - while (nextDiff != NULL) { - if (prevDiff->operation == EQUAL && - nextDiff->operation == EQUAL) { - // This is a single edit surrounded by equalities. - if (thisDiff->text.endsWith(prevDiff->text)) { - // Shift the edit over the previous equality. - thisDiff->text = prevDiff->text - + thisDiff->text.left(thisDiff->text.length() - - prevDiff->text.length()); - nextDiff->text = prevDiff->text + nextDiff->text; - pointer.previous(); // Walk past nextDiff. - pointer.previous(); // Walk past thisDiff. - pointer.previous(); // Walk past prevDiff. - pointer.remove(); // Delete prevDiff. - pointer.next(); // Walk past thisDiff. - thisDiff = &pointer.next(); // Walk past nextDiff. - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - changes = true; - } else if (thisDiff->text.startsWith(nextDiff->text)) { - // Shift the edit over the next equality. - prevDiff->text += nextDiff->text; - thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length()) - + nextDiff->text; - pointer.remove(); // Delete nextDiff. - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - changes = true; + thisDiff = std::safe_next_element(diffs, pointer); + } + if (diffs.back().text.empty()) { + diffs.erase(diffs.end() - 1); + } + + /* + * Second pass: look for single edits surrounded on both sides by equalities + * which can be shifted sideways to eliminate an equality. + * e.g: ABAC -> ABAC + */ + bool changes = false; + // Create a new iterator at the start. + // (As opposed to walking the current one back.) + pointer = diffs.begin(); + Diff *prevDiff = std::safe_next_element(diffs, pointer); + thisDiff = std::safe_next_element(diffs, pointer); + Diff *nextDiff = std::safe_next_element(diffs, pointer); + + // Intentionally ignore the first and last element (don't need checking). + while (nextDiff != NULL) { + if (prevDiff->operation == EQUAL && + nextDiff->operation == EQUAL) { + // This is a single edit surrounded by equalities. + if (std::ends_with(thisDiff->text, prevDiff->text)) { + // Shift the edit over the previous equality. + thisDiff->text = prevDiff->text + + thisDiff->text.substr(0, thisDiff->text.length() + - prevDiff->text.length()); + nextDiff->text = prevDiff->text + nextDiff->text; + std::advance(pointer, -3); + diffs.erase(pointer++); // Delete prevDiff. + thisDiff = std::safe_next_element(diffs, pointer); + nextDiff = std::safe_next_element(diffs, pointer); + changes = true; + } else if (std::starts_with(thisDiff->text, nextDiff->text)) { + // Shift the edit over the next equality. + prevDiff->text += nextDiff->text; + thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length()) + + nextDiff->text; + diffs.erase(pointer++); // Delete nextDiff. + nextDiff = (pointer != diffs.end()) ? &(*pointer) : NULL; + changes = true; + } } + prevDiff = thisDiff; + thisDiff = nextDiff; + nextDiff = std::safe_next_element(diffs, pointer); + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + diff_cleanupMerge(diffs); } - prevDiff = thisDiff; - thisDiff = nextDiff; - nextDiff = pointer.hasNext() ? &pointer.next() : NULL; - } - // If shifts were made, the diff needs reordering and another shift sweep. - if (changes) { - diff_cleanupMerge(diffs); - } } -int diff_match_patch::diff_xIndex(const QList &diffs, int loc) { - int chars1 = 0; - int chars2 = 0; - int last_chars1 = 0; - int last_chars2 = 0; - Diff lastDiff; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != INSERT) { - // Equality or deletion. - chars1 += aDiff.text.length(); - } - if (aDiff.operation != DELETE) { - // Equality or insertion. - chars2 += aDiff.text.length(); - } - if (chars1 > loc) { - // Overshot the location. - lastDiff = aDiff; - break; - } - last_chars1 = chars1; - last_chars2 = chars2; - } - if (lastDiff.operation == DELETE) { - // The location was deleted. - return last_chars2; - } - // Add the remaining character length. - return last_chars2 + (loc - last_chars1); +int diff_match_patch::diff_xIndex(const std::vector &diffs, int loc) { + int chars1 = 0; + int chars2 = 0; + int last_chars1 = 0; + int last_chars2 = 0; + Diff lastDiff; + for(Diff aDiff: diffs) { + if (aDiff.operation != INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff.operation == DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); } -QString diff_match_patch::diff_prettyHtml(const QList &diffs) { - QString html; - QString text; - foreach(Diff aDiff, diffs) { - text = aDiff.text; - text.replace("&", "&").replace("<", "<") - .replace(">", ">").replace("\n", "¶
"); - switch (aDiff.operation) { - case INSERT: - html += QString("") + text - + QString(""); - break; - case DELETE: - html += QString("") + text - + QString(""); - break; - case EQUAL: - html += QString("") + text + QString(""); - break; - } - } - return html; +std::wstring diff_match_patch::diff_prettyHtml(const std::vector &diffs) { + std::wstring html; + std::wstring text; + for(Diff aDiff : diffs) { + text = aDiff.text; + std::replace_all(text, L"&", L"&"); + std::replace_all(text, L"<", L"<"); + std::replace_all(text, L">", L">"); + std::replace_all(text, L"\n", L"¶
"); + switch (aDiff.operation) { + case INSERT: + html += std::wstring(L"") + text + + std::wstring(L""); + break; + case DELETE: + html += std::wstring(L"") + text + + std::wstring(L""); + break; + case EQUAL: + html += std::wstring(L"") + text + std::wstring(L""); + break; + } + } + return html; } -QString diff_match_patch::diff_text1(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != INSERT) { - text += aDiff.text; +std::wstring diff_match_patch::diff_text1(const std::vector &diffs) { + std::wstring text; + for(Diff aDiff: diffs) { + if (aDiff.operation != INSERT) { + text += aDiff.text; + } } - } - return text; + return text; } -QString diff_match_patch::diff_text2(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - if (aDiff.operation != DELETE) { - text += aDiff.text; +std::wstring diff_match_patch::diff_text2(const std::vector &diffs) { + std::wstring text; + for(Diff aDiff : diffs) { + if (aDiff.operation != DELETE) { + text += aDiff.text; + } } - } - return text; + return text; } -int diff_match_patch::diff_levenshtein(const QList &diffs) { - int levenshtein = 0; - int insertions = 0; - int deletions = 0; - foreach(Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: - insertions += aDiff.text.length(); - break; - case DELETE: - deletions += aDiff.text.length(); - break; - case EQUAL: - // A deletion and an insertion is one substitution. - levenshtein += std::max(insertions, deletions); - insertions = 0; - deletions = 0; - break; - } - } - levenshtein += std::max(insertions, deletions); - return levenshtein; +int diff_match_patch::diff_levenshtein(const std::vector &diffs) { + int levenshtein = 0; + int insertions = 0; + int deletions = 0; + for(Diff aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max(insertions, deletions); + return levenshtein; } -QString diff_match_patch::diff_toDelta(const QList &diffs) { - QString text; - foreach(Diff aDiff, diffs) { - switch (aDiff.operation) { - case INSERT: { - QString encoded = QString(QUrl::toPercentEncoding(aDiff.text, - " !~*'();/?:@&=+$,#")); - text += QString("+") + encoded + QString("\t"); - break; - } - case DELETE: - text += QString("-") + QString::number(aDiff.text.length()) - + QString("\t"); - break; - case EQUAL: - text += QString("=") + QString::number(aDiff.text.length()) - + QString("\t"); - break; - } - } - if (!text.isEmpty()) { - // Strip off trailing tab character. - text = text.left(text.length() - 1); - } - return text; +std::wstring diff_match_patch::diff_toDelta(const std::vector &diffs) { + std::wstring text; + for(Diff aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: { + std::wstring encoded = std::url_encode(aDiff.text, + " !~*'();/?:@&=+$,#"); + text += std::wstring(L"+") + encoded + std::wstring(L"\t"); + break; + } + case DELETE: + text += std::wstring(L"-") + std::to_wstring(aDiff.text.length()) + + std::wstring(L"\t"); + break; + case EQUAL: + text += std::wstring(L"=") + std::to_wstring(aDiff.text.length()) + + std::wstring(L"\t"); + break; + } + } + if (!text.empty()) { + // Strip off trailing tab character. + text = text.substr(0, text.length() - 1); + } + return text; } -QList diff_match_patch::diff_fromDelta(const QString &text1, - const QString &delta) { - QList diffs; - int pointer = 0; // Cursor in text1 - QStringList tokens = delta.split("\t"); - foreach(QString token, tokens) { - if (token.isEmpty()) { - // Blank tokens are ok (from a trailing \t). - continue; - } - // Each token begins with a one character parameter which specifies the - // operation of this token (delete, insert, equality). - QString param = safeMid(token, 1); - switch (token[0].toAscii()) { - case '+': - param = QUrl::fromPercentEncoding(qPrintable(param)); - diffs.append(Diff(INSERT, param)); - break; - case '-': - // Fall through. - case '=': { - int n; - n = param.toInt(); - if (n < 0) { - throw QString("Negative number in diff_fromDelta: %1").arg(param); +std::vector diff_match_patch::diff_fromDelta(const std::wstring &text1, + const std::wstring &delta) { + std::vector diffs; + int pointer = 0; // Cursor in text1 + std::wstring_list tokens = std::split(delta, '\t'); + for(std::wstring token: tokens) { + if (token.empty()) { + // Blank tokens are ok (from a trailing \t). + continue; } - QString text; - text = safeMid(text1, pointer, n); - pointer += n; - if (token[0] == QChar('=')) { - diffs.append(Diff(EQUAL, text)); - } else { - diffs.append(Diff(DELETE, text)); + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + std::wstring param = safeMid(token, 1); + switch (token[0]) { + case '+': + param = std::url_decode(param); + diffs.push_back(Diff(INSERT, param)); + break; + case '-': + // Fall through. + case '=': { + int n; + n = std::stoi(param); + if (n < 0) { + throw std::format(L"Negative number in diff_fromDelta: %ls", param.c_str()); + } + std::wstring text; + text = safeMid(text1, pointer, n); + pointer += n; + if (token[0] == char('=')) { + diffs.push_back(Diff(EQUAL, text)); + } else { + diffs.push_back(Diff(DELETE, text)); + } + break; + } + default: + throw std::format(L"Invalid diff operation in diff_fromDelta: %c", token[0]); } - break; - } - default: - throw QString("Invalid diff operation in diff_fromDelta: %1") - .arg(token[0]); - } - } - if (pointer != text1.length()) { - throw QString("Delta length (%1) smaller than source text length (%2)") - .arg(pointer).arg(text1.length()); - } - return diffs; + } + if ((std::wstring::size_type)pointer != text1.length()) { + throw std::format(L"Delta length (%d) smaller than source text length (%d)", pointer, (int)text1.length()); + } + return diffs; } - // MATCH FUNCTIONS +// MATCH FUNCTIONS -int diff_match_patch::match_main(const QString &text, const QString &pattern, +int diff_match_patch::match_main(const std::wstring &text, const std::wstring &pattern, int loc) { - // Check for null inputs. - if (text.isNull() || pattern.isNull()) { - throw "Null inputs. (match_main)"; - } - - loc = std::max(0, std::min(loc, text.length())); - if (text == pattern) { - // Shortcut (potentially not guaranteed by the algorithm) - return 0; - } else if (text.isEmpty()) { - // Nothing to match. - return -1; - } else if (loc + pattern.length() <= text.length() - && safeMid(text, loc, pattern.length()) == pattern) { - // Perfect match at the perfect spot! (Includes case of null pattern) - return loc; - } else { - // Do a fuzzy compare. - return match_bitap(text, pattern, loc); - } + // Check for null inputs. + if (text.empty() || pattern.empty()) { + throw "Null inputs. (match_main)"; + } + + loc = std::max((std::wstring::size_type)0, std::min((std::wstring::size_type)loc, text.length())); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.empty()) { + // Nothing to match. + return -1; + } else if (loc + pattern.length() <= text.length() + && safeMid(text, loc, pattern.length()) == pattern) { + // Perfect match at the perfect spot! (Includes case of null pattern) + return loc; + } else { + // Do a fuzzy compare. + return match_bitap(text, pattern, loc); + } } -int diff_match_patch::match_bitap(const QString &text, const QString &pattern, +int diff_match_patch::match_bitap(const std::wstring &text, const std::wstring &pattern, int loc) { - if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { - throw "Pattern too long for this application."; - } - - // Initialise the alphabet. - QMap s = match_alphabet(pattern); - - // Highest score beyond which we give up. - double score_threshold = Match_Threshold; - // Is there a nearby exact match? (speedup) - int best_loc = text.indexOf(pattern, loc); - if (best_loc != -1) { - score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), - score_threshold); - // What about in the other direction? (speedup) - best_loc = text.lastIndexOf(pattern, loc + pattern.length()); + if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + std::unordered_map s = match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + int best_loc = text.find(pattern, loc); if (best_loc != -1) { - score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), - score_threshold); - } - } - - // Initialise the bit arrays. - int matchmask = 1 << (pattern.length() - 1); - best_loc = -1; - - int bin_min, bin_mid; - int bin_max = pattern.length() + text.length(); - int *rd; - int *last_rd = NULL; - for (int d = 0; d < pattern.length(); d++) { - // Scan for the best match; each iteration allows for one more error. - // Run a binary search to determine how far from 'loc' we can stray at - // this error level. - bin_min = 0; - bin_mid = bin_max; - while (bin_min < bin_mid) { - if (match_bitapScore(d, loc + bin_mid, loc, pattern) - <= score_threshold) { - bin_min = bin_mid; - } else { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + // What about in the other direction? (speedup) + best_loc = text.find_last_of(pattern, loc + pattern.length()); + if (best_loc != -1) { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + } + } + + // Initialise the bit arrays. + int matchmask = 1 << (pattern.length() - 1); + best_loc = -1; + + int bin_min, bin_mid; + int bin_max = pattern.length() + text.length(); + int *rd; + int *last_rd = NULL; + for (int d = 0; d < pattern.length(); d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore(d, loc + bin_mid, loc, pattern) + <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. bin_max = bin_mid; - } - bin_mid = (bin_max - bin_min) / 2 + bin_min; - } - // Use the result from this iteration as the maximum for the next. - bin_max = bin_mid; - int start = std::max(1, loc - bin_mid + 1); - int finish = std::min(loc + bin_mid, text.length()) + pattern.length(); - - rd = new int[finish + 2]; - rd[finish + 1] = (1 << d) - 1; - for (int j = finish; j >= start; j--) { - int charMatch; - if (text.length() <= j - 1) { - // Out of range. - charMatch = 0; - } else { - charMatch = s.value(text[j - 1], 0); - } - if (d == 0) { - // First pass: exact match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; - } else { - // Subsequent passes: fuzzy match. - rd[j] = ((rd[j + 1] << 1) | 1) & charMatch - | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) - | last_rd[j + 1]; - } - if ((rd[j] & matchmask) != 0) { - double score = match_bitapScore(d, j - 1, loc, pattern); - // This match will almost certainly be better than any existing - // match. But check anyway. - if (score <= score_threshold) { - // Told you so. - score_threshold = score; - best_loc = j - 1; - if (best_loc > loc) { - // When passing loc, don't exceed our current distance from loc. - start = std::max(1, 2 * loc - best_loc); - } else { - // Already passed loc, downhill from here on in. + int start = std::max(1, loc - bin_mid + 1); + int finish = std::min((std::wstring::size_type)(loc + bin_mid), text.length()) + pattern.length(); + + rd = new int[finish + 2]; + rd[finish + 1] = (1 << d) - 1; + for (int j = finish; j >= start; j--) { + int charMatch; + if (text.length() <= j - 1) { + // Out of range. + charMatch = 0; + } else { + auto it = s.find(text[j-1]); + if (it != s.end()) + charMatch = it->second; + else + charMatch = 0; + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = (((rd[j + 1] << 1) | 1) & charMatch) + | (((last_rd[j + 1] | last_rd[j]) << 1) | 1) + | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = std::max(1, 2 * loc - best_loc); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. break; - } } - } - } - if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { - // No hope for a (better) match at greater error levels. - break; + delete [] last_rd; + last_rd = rd; } delete [] last_rd; - last_rd = rd; - } - delete [] last_rd; - delete [] rd; - return best_loc; + delete [] rd; + return best_loc; } double diff_match_patch::match_bitapScore(int e, int x, int loc, - const QString &pattern) { - const float accuracy = static_cast (e) / pattern.length(); - const int proximity = qAbs(loc - x); - if (Match_Distance == 0) { - // Dodge divide by zero error. - return proximity == 0 ? accuracy : 1.0; - } - return accuracy + (proximity / static_cast (Match_Distance)); + const std::wstring &pattern) { + const float accuracy = static_cast (e) / pattern.length(); + const int proximity = std::abs(loc - x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + (proximity / static_cast (Match_Distance)); } -QMap diff_match_patch::match_alphabet(const QString &pattern) { - QMap s; - int i; - for (i = 0; i < pattern.length(); i++) { - QChar c = pattern[i]; - s.insert(c, 0); - } - for (i = 0; i < pattern.length(); i++) { - QChar c = pattern[i]; - s.insert(c, s.value(c) | (1 << (pattern.length() - i - 1))); - } - return s; +std::unordered_map diff_match_patch::match_alphabet(const std::wstring &pattern) { + std::unordered_map s; + int i; + for (i = 0; i < pattern.length(); i++) { + char c = pattern[i]; + s[c] = 0; + } + for (i = 0; i < pattern.length(); i++) { + char c = pattern[i]; + s[c] = s[c] | (1 << (pattern.length() - i - 1)); + } + return s; } // PATCH FUNCTIONS -void diff_match_patch::patch_addContext(Patch &patch, const QString &text) { - if (text.isEmpty()) { - return; - } - QString pattern = safeMid(text, patch.start2, patch.length1); - int padding = 0; - - // Look for the first and last matches of pattern in text. If two different - // matches are found, increase the pattern length. - while (text.indexOf(pattern) != text.lastIndexOf(pattern) - && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { +void diff_match_patch::patch_addContext(Patch &patch, const std::wstring &text) { + if (text.empty()) { + return; + } + std::wstring pattern = safeMid(text, patch.start2, patch.length1); + int padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while (text.find(pattern) != text.find_last_of(pattern) + && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { + padding += Patch_Margin; + pattern = safeMid(text, std::max(0, patch.start2 - padding), + std::min(text.length(), (std::wstring::size_type)(patch.start2 + patch.length1 + padding)) + - std::max(0, patch.start2 - padding)); + } + // Add one chunk for good luck. padding += Patch_Margin; - pattern = safeMid(text, std::max(0, patch.start2 - padding), - std::min(text.length(), patch.start2 + patch.length1 + padding) - - std::max(0, patch.start2 - padding)); - } - // Add one chunk for good luck. - padding += Patch_Margin; - - // Add the prefix. - QString prefix = safeMid(text, std::max(0, patch.start2 - padding), - patch.start2 - std::max(0, patch.start2 - padding)); - if (!prefix.isEmpty()) { - patch.diffs.prepend(Diff(EQUAL, prefix)); - } - // Add the suffix. - QString suffix = safeMid(text, patch.start2 + patch.length1, - std::min(text.length(), patch.start2 + patch.length1 + padding) - - (patch.start2 + patch.length1)); - if (!suffix.isEmpty()) { - patch.diffs.append(Diff(EQUAL, suffix)); - } - - // Roll back the start points. - patch.start1 -= prefix.length(); - patch.start2 -= prefix.length(); - // Extend the lengths. - patch.length1 += prefix.length() + suffix.length(); - patch.length2 += prefix.length() + suffix.length(); + + // Add the prefix. + std::wstring prefix = safeMid(text, std::max(0, patch.start2 - padding), + patch.start2 - std::max(0, patch.start2 - padding)); + if (!prefix.empty()) { + patch.diffs.insert(patch.diffs.begin(), Diff(EQUAL, prefix)); + } + // Add the suffix. + std::wstring suffix = safeMid(text, patch.start2 + patch.length1, + std::min(text.length(), (std::wstring::size_type)(patch.start2 + patch.length1 + padding)) + - (patch.start2 + patch.length1)); + if (!suffix.empty()) { + patch.diffs.push_back(Diff(EQUAL, suffix)); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); } -QList diff_match_patch::patch_make(const QString &text1, - const QString &text2) { - // Check for null inputs. - if (text1.isNull() || text2.isNull()) { - throw "Null inputs. (patch_make)"; - } +std::vector diff_match_patch::patch_make(const std::wstring &text1, + const std::wstring &text2) { + // Check for null inputs. + if (text1.empty() || text2.empty()) { + throw "Null inputs. (patch_make)"; + } - // No diffs provided, compute our own. - QList diffs = diff_main(text1, text2, true); - if (diffs.size() > 2) { - diff_cleanupSemantic(diffs); - diff_cleanupEfficiency(diffs); - } + // No diffs provided, compute our own. + std::vector diffs = diff_main(text1, text2, true); + if (diffs.size() > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } - return patch_make(text1, diffs); + return patch_make(text1, diffs); } -QList diff_match_patch::patch_make(const QList &diffs) { - // No origin string provided, compute our own. - const QString text1 = diff_text1(diffs); - return patch_make(text1, diffs); +std::vector diff_match_patch::patch_make(const std::vector &diffs) { + // No origin string provided, compute our own. + const std::wstring text1 = diff_text1(diffs); + return patch_make(text1, diffs); } -QList diff_match_patch::patch_make(const QString &text1, - const QString &text2, - const QList &diffs) { - // text2 is entirely unused. - return patch_make(text1, diffs); +std::vector diff_match_patch::patch_make(const std::wstring &text1, + const std::wstring &text2, + const std::vector &diffs) { + // text2 is entirely unused. + return patch_make(text1, diffs); - Q_UNUSED(text2) + (void)text2; } -QList diff_match_patch::patch_make(const QString &text1, - const QList &diffs) { - // Check for null inputs. - if (text1.isNull()) { - throw "Null inputs. (patch_make)"; - } - - QList patches; - if (diffs.isEmpty()) { - return patches; // Get rid of the null case. - } - Patch patch; - int char_count1 = 0; // Number of characters into the text1 string. - int char_count2 = 0; // Number of characters into the text2 string. - // Start with text1 (prepatch_text) and apply the diffs until we arrive at - // text2 (postpatch_text). We recreate the patches one by one to determine - // context info. - QString prepatch_text = text1; - QString postpatch_text = text1; - foreach(Diff aDiff, diffs) { - if (patch.diffs.isEmpty() && aDiff.operation != EQUAL) { - // A new patch starts here. - patch.start1 = char_count1; - patch.start2 = char_count2; - } - - switch (aDiff.operation) { - case INSERT: - patch.diffs.append(aDiff); - patch.length2 += aDiff.text.length(); - postpatch_text = postpatch_text.left(char_count2) - + aDiff.text + safeMid(postpatch_text, char_count2); - break; - case DELETE: - patch.length1 += aDiff.text.length(); - patch.diffs.append(aDiff); - postpatch_text = postpatch_text.left(char_count2) - + safeMid(postpatch_text, char_count2 + aDiff.text.length()); - break; - case EQUAL: - if (aDiff.text.length() <= 2 * Patch_Margin - && !patch.diffs.isEmpty() && !(aDiff == diffs.back())) { - // Small equality inside a patch. - patch.diffs.append(aDiff); - patch.length1 += aDiff.text.length(); - patch.length2 += aDiff.text.length(); +std::vector diff_match_patch::patch_make(const std::wstring &text1, + const std::vector &diffs) { + // Check for null inputs. + if (text1.empty()) { + throw "Null inputs. (patch_make)"; + } + + std::vector patches; + if (diffs.empty()) { + return patches; // Get rid of the null case. + } + Patch patch; + int char_count1 = 0; // Number of characters into the text1 string. + int char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + std::wstring prepatch_text = text1; + std::wstring postpatch_text = text1; + for(Diff aDiff : diffs) { + if (patch.diffs.empty() && aDiff.operation != EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; } - if (aDiff.text.length() >= 2 * Patch_Margin) { - // Time for a new patch. - if (!patch.diffs.isEmpty()) { - patch_addContext(patch, prepatch_text); - patches.append(patch); - patch = Patch(); - // Unlike Unidiff, our patch lists have a rolling context. - // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff - // Update prepatch text & pos to reflect the application of the - // just completed patch. - prepatch_text = postpatch_text; - char_count1 = char_count2; - } + switch (aDiff.operation) { + case INSERT: + patch.diffs.push_back(aDiff); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.substr(0, char_count2) + + aDiff.text + safeMid(postpatch_text, char_count2); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.push_back(aDiff); + postpatch_text = postpatch_text.substr(0, char_count2) + + safeMid(postpatch_text, char_count2 + aDiff.text.length()); + break; + case EQUAL: + if (aDiff.text.length() <= 2 * Patch_Margin + && !patch.diffs.empty() && !(aDiff == diffs.back())) { + // Small equality inside a patch. + patch.diffs.push_back(aDiff); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if (aDiff.text.length() >= 2 * Patch_Margin) { + // Time for a new patch. + if (!patch.diffs.empty()) { + patch_addContext(patch, prepatch_text); + patches.push_back(patch); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; } - break; - } - // Update the current character count. - if (aDiff.operation != INSERT) { - char_count1 += aDiff.text.length(); + // Update the current character count. + if (aDiff.operation != INSERT) { + char_count1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + char_count2 += aDiff.text.length(); + } } - if (aDiff.operation != DELETE) { - char_count2 += aDiff.text.length(); + // Pick up the leftover patch if not empty. + if (!patch.diffs.empty()) { + patch_addContext(patch, prepatch_text); + patches.push_back(patch); } - } - // Pick up the leftover patch if not empty. - if (!patch.diffs.isEmpty()) { - patch_addContext(patch, prepatch_text); - patches.append(patch); - } - return patches; + return patches; } -QList diff_match_patch::patch_deepCopy(QList &patches) { - QList patchesCopy; - foreach(Patch aPatch, patches) { - Patch patchCopy = Patch(); - foreach(Diff aDiff, aPatch.diffs) { - Diff diffCopy = Diff(aDiff.operation, aDiff.text); - patchCopy.diffs.append(diffCopy); - } - patchCopy.start1 = aPatch.start1; - patchCopy.start2 = aPatch.start2; - patchCopy.length1 = aPatch.length1; - patchCopy.length2 = aPatch.length2; - patchesCopy.append(patchCopy); - } - return patchesCopy; +std::vector diff_match_patch::patch_deepCopy(std::vector &patches) { + std::vector patchesCopy; + for(Patch aPatch: patches) { + Patch patchCopy = Patch(); + for(Diff aDiff: aPatch.diffs) { + Diff diffCopy = Diff(aDiff.operation, aDiff.text); + patchCopy.diffs.push_back(diffCopy); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.push_back(patchCopy); + } + return patchesCopy; } -QPair > diff_match_patch::patch_apply( - QList &patches, const QString &sourceText) { - QString text = sourceText; // Copy to preserve original. - if (patches.isEmpty()) { - return QPair >(text, QVector(0)); - } - - // Deep copy the patches so that no changes are made to originals. - QList patchesCopy = patch_deepCopy(patches); - - QString nullPadding = patch_addPadding(patchesCopy); - text = nullPadding + text + nullPadding; - patch_splitMax(patchesCopy); - - int x = 0; - // delta keeps track of the offset between the expected and actual location - // of the previous patch. If there are patches expected at positions 10 and - // 20, but the first patch was found at 12, delta is 2 and the second patch - // has an effective expected position of 22. - int delta = 0; - QVector results(patchesCopy.size()); - foreach(Patch aPatch, patchesCopy) { - int expected_loc = aPatch.start2 + delta; - QString text1 = diff_text1(aPatch.diffs); - int start_loc; - int end_loc = -1; - if (text1.length() > Match_MaxBits) { - // patch_splitMax will only provide an oversized pattern in the case of - // a monster delete. - start_loc = match_main(text, text1.left(Match_MaxBits), expected_loc); - if (start_loc != -1) { - end_loc = match_main(text, text1.right(Match_MaxBits), - expected_loc + text1.length() - Match_MaxBits); - if (end_loc == -1 || start_loc >= end_loc) { - // Can't find valid trailing context. Drop this patch. - start_loc = -1; - } - } - } else { - start_loc = match_main(text, text1, expected_loc); +std::pair > diff_match_patch::patch_apply( + std::vector &patches, const std::wstring &sourceText) { + std::wstring text = sourceText; // Copy to preserve original. + if (patches.empty()) { + return std::pair >(text, std::vector(0)); } - if (start_loc == -1) { - // No match found. :( - results[x] = false; - // Subtract the delta for this failed patch from subsequent patches. - delta -= aPatch.length2 - aPatch.length1; - } else { - // Found a match. :) - results[x] = true; - delta = start_loc - expected_loc; - QString text2; - if (end_loc == -1) { - text2 = safeMid(text, start_loc, text1.length()); - } else { - text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); - } - if (text1 == text2) { - // Perfect match, just shove the replacement text in. - text = text.left(start_loc) + diff_text2(aPatch.diffs) - + safeMid(text, start_loc + text1.length()); - } else { - // Imperfect match. Run a diff to get a framework of equivalent - // indices. - QList diffs = diff_main(text1, text2, false); - if (text1.length() > Match_MaxBits - && diff_levenshtein(diffs) / static_cast (text1.length()) - > Patch_DeleteThreshold) { - // The end points match, but the content is unacceptably bad. - results[x] = false; + + // Deep copy the patches so that no changes are made to originals. + std::vector patchesCopy = patch_deepCopy(patches); + + std::wstring nullPadding = patch_addPadding(patchesCopy); + text = nullPadding + text + nullPadding; + patch_splitMax(patchesCopy); + + int x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + int delta = 0; + std::vector results(patchesCopy.size()); + for(Patch aPatch: patchesCopy) { + int expected_loc = aPatch.start2 + delta; + std::wstring text1 = diff_text1(aPatch.diffs); + int start_loc; + int end_loc = -1; + if (text1.length() > Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = match_main(text, text1.substr(0, Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, text1.substr(text1.length() - Match_MaxBits), + expected_loc + text1.length() - Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; } else { - diff_cleanupSemanticLossless(diffs); - int index1 = 0; - foreach(Diff aDiff, aPatch.diffs) { - if (aDiff.operation != EQUAL) { - int index2 = diff_xIndex(diffs, index1); - if (aDiff.operation == INSERT) { - // Insertion - text = text.left(start_loc + index2) + aDiff.text - + safeMid(text, start_loc + index2); - } else if (aDiff.operation == DELETE) { - // Deletion - text = text.left(start_loc + index2) - + safeMid(text, start_loc + diff_xIndex(diffs, - index1 + aDiff.text.length())); - } + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + std::wstring text2; + if (end_loc == -1) { + text2 = safeMid(text, start_loc, text1.length()); + } else { + text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); } - if (aDiff.operation != DELETE) { - index1 += aDiff.text.length(); + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text = text.substr(0, start_loc) + diff_text2(aPatch.diffs) + + safeMid(text, start_loc + text1.length()); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + std::vector diffs = diff_main(text1, text2, false); + if (text1.length() > Match_MaxBits + && diff_levenshtein(diffs) / static_cast (text1.length()) + > Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + diff_cleanupSemanticLossless(diffs); + int index1 = 0; + for(Diff aDiff: aPatch.diffs) { + if (aDiff.operation != EQUAL) { + int index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == INSERT) { + // Insertion + text = text.substr(0, start_loc + index2) + aDiff.text + + safeMid(text, start_loc + index2); + } else if (aDiff.operation == DELETE) { + // Deletion + text = text.substr(0, start_loc + index2) + + safeMid(text, start_loc + diff_xIndex(diffs, + index1 + aDiff.text.length())); + } + } + if (aDiff.operation != DELETE) { + index1 += aDiff.text.length(); + } + } + } } - } } - } - } - x++; - } - // Strip the padding off. - text = safeMid(text, nullPadding.length(), text.length() - - 2 * nullPadding.length()); - return QPair >(text, results); + x++; + } + // Strip the padding off. + text = safeMid(text, nullPadding.length(), text.length() + - 2 * nullPadding.length()); + return std::pair >(text, results); } -QString diff_match_patch::patch_addPadding(QList &patches) { - short paddingLength = Patch_Margin; - QString nullPadding = ""; - for (short x = 1; x <= paddingLength; x++) { - nullPadding += QChar((ushort)x); - } - - // Bump all the patches forward. - QMutableListIterator pointer(patches); - while (pointer.hasNext()) { - Patch &aPatch = pointer.next(); - aPatch.start1 += paddingLength; - aPatch.start2 += paddingLength; - } - - // Add some padding on start of first diff. - Patch &firstPatch = patches.first(); - QList &firstPatchDiffs = firstPatch.diffs; - if (firstPatchDiffs.empty() || firstPatchDiffs.first().operation != EQUAL) { - // Add nullPadding equality. - firstPatchDiffs.prepend(Diff(EQUAL, nullPadding)); - firstPatch.start1 -= paddingLength; // Should be 0. - firstPatch.start2 -= paddingLength; // Should be 0. - firstPatch.length1 += paddingLength; - firstPatch.length2 += paddingLength; - } else if (paddingLength > firstPatchDiffs.first().text.length()) { - // Grow first equality. - Diff &firstDiff = firstPatchDiffs.first(); - int extraLength = paddingLength - firstDiff.text.length(); - firstDiff.text = safeMid(nullPadding, firstDiff.text.length(), - paddingLength - firstDiff.text.length()) + firstDiff.text; - firstPatch.start1 -= extraLength; - firstPatch.start2 -= extraLength; - firstPatch.length1 += extraLength; - firstPatch.length2 += extraLength; - } - - // Add some padding on end of last diff. - Patch &lastPatch = patches.first(); - QList &lastPatchDiffs = lastPatch.diffs; - if (lastPatchDiffs.empty() || lastPatchDiffs.last().operation != EQUAL) { - // Add nullPadding equality. - lastPatchDiffs.append(Diff(EQUAL, nullPadding)); - lastPatch.length1 += paddingLength; - lastPatch.length2 += paddingLength; - } else if (paddingLength > lastPatchDiffs.last().text.length()) { - // Grow last equality. - Diff &lastDiff = lastPatchDiffs.last(); - int extraLength = paddingLength - lastDiff.text.length(); - lastDiff.text += nullPadding.left(extraLength); - lastPatch.length1 += extraLength; - lastPatch.length2 += extraLength; - } - - return nullPadding; +std::wstring diff_match_patch::patch_addPadding(std::vector &patches) { + short paddingLength = Patch_Margin; + std::wstring nullPadding = L""; + for (short x = 1; x <= paddingLength; x++) { + nullPadding += wchar_t((unsigned short)x); + } + + // Bump all the patches forward. + auto pointer = patches.begin(); + while (pointer != patches.end()) { + Patch &aPatch = *pointer; + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + Patch &firstPatch = patches.front(); + std::vector &firstPatchDiffs = firstPatch.diffs; + if (firstPatchDiffs.empty() || firstPatchDiffs.front().operation != EQUAL) { + // Add nullPadding equality. + firstPatchDiffs.insert(firstPatchDiffs.begin(), Diff(EQUAL, nullPadding)); + firstPatch.start1 -= paddingLength; // Should be 0. + firstPatch.start2 -= paddingLength; // Should be 0. + firstPatch.length1 += paddingLength; + firstPatch.length2 += paddingLength; + } else if (paddingLength > firstPatchDiffs.front().text.length()) { + // Grow first equality. + Diff &firstDiff = firstPatchDiffs.front(); + int extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = safeMid(nullPadding, firstDiff.text.length(), + paddingLength - firstDiff.text.length()) + firstDiff.text; + firstPatch.start1 -= extraLength; + firstPatch.start2 -= extraLength; + firstPatch.length1 += extraLength; + firstPatch.length2 += extraLength; + } + + // Add some padding on end of last diff. + Patch &lastPatch = patches.front(); + std::vector &lastPatchDiffs = lastPatch.diffs; + if (lastPatchDiffs.empty() || lastPatchDiffs.back().operation != EQUAL) { + // Add nullPadding equality. + lastPatchDiffs.push_back(Diff(EQUAL, nullPadding)); + lastPatch.length1 += paddingLength; + lastPatch.length2 += paddingLength; + } else if (paddingLength > lastPatchDiffs.back().text.length()) { + // Grow last equality. + Diff &lastDiff = lastPatchDiffs.back(); + int extraLength = paddingLength - lastDiff.text.length(); + lastDiff.text += nullPadding.substr(0, extraLength); + lastPatch.length1 += extraLength; + lastPatch.length2 += extraLength; + } + + return nullPadding; } -void diff_match_patch::patch_splitMax(QList &patches) { - short patch_size = Match_MaxBits; - QString precontext, postcontext; - Patch patch; - int start1, start2; - bool empty; - Operation diff_type; - QString diff_text; - QMutableListIterator pointer(patches); - Patch bigpatch; - - if (pointer.hasNext()) { - bigpatch = pointer.next(); - } - - while (!bigpatch.isNull()) { - if (bigpatch.length1 <= patch_size) { - bigpatch = pointer.hasNext() ? pointer.next() : Patch(); - continue; - } - // Remove the big old patch. - pointer.remove(); - start1 = bigpatch.start1; - start2 = bigpatch.start2; - precontext = ""; - while (!bigpatch.diffs.isEmpty()) { - // Create one of several smaller patches. - patch = Patch(); - empty = true; - patch.start1 = start1 - precontext.length(); - patch.start2 = start2 - precontext.length(); - if (!precontext.isEmpty()) { - patch.length1 = patch.length2 = precontext.length(); - patch.diffs.append(Diff(EQUAL, precontext)); - } - while (!bigpatch.diffs.isEmpty() - && patch.length1 < patch_size - Patch_Margin) { - diff_type = bigpatch.diffs.front().operation; - diff_text = bigpatch.diffs.front().text; - if (diff_type == INSERT) { - // Insertions are harmless. - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - patch.diffs.append(bigpatch.diffs.front()); - bigpatch.diffs.removeFirst(); - empty = false; - } else if (diff_type == DELETE && patch.diffs.size() == 1 - && patch.diffs.front().operation == EQUAL - && diff_text.length() > 2 * patch_size) { - // This is a large deletion. Let it pass in one chunk. - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - empty = false; - patch.diffs.append(Diff(diff_type, diff_text)); - bigpatch.diffs.removeFirst(); - } else { - // Deletion or equality. Only take as much as we can stomach. - diff_text = diff_text.left(std::min(diff_text.length(), - patch_size - patch.length1 - Patch_Margin)); - patch.length1 += diff_text.length(); - start1 += diff_text.length(); - if (diff_type == EQUAL) { - patch.length2 += diff_text.length(); - start2 += diff_text.length(); - } else { - empty = false; - } - patch.diffs.append(Diff(diff_type, diff_text)); - if (diff_text == bigpatch.diffs.front().text) { - bigpatch.diffs.removeFirst(); - } else { - bigpatch.diffs.front().text = safeMid(bigpatch.diffs.front().text, - diff_text.length()); - } +void diff_match_patch::patch_splitMax(std::vector &patches) { + short patch_size = Match_MaxBits; + std::wstring precontext, postcontext; + Patch patch; + int start1, start2; + bool empty; + Operation diff_type; + std::wstring diff_text; + auto pointer = patches.begin(); + Patch bigpatch; + + if (pointer != patches.end()) { + bigpatch = *pointer; + } + + while (!bigpatch.isNull()) { + if (bigpatch.length1 <= patch_size) { + if (pointer != patches.end()) + bigpatch = *pointer++; + else + bigpatch = Patch(); + continue; } - } - // Compute the head context for the next patch. - precontext = diff_text2(patch.diffs); - precontext = safeMid(precontext, precontext.length() - Patch_Margin); - // Append the end context for this patch. - if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { - postcontext = diff_text1(bigpatch.diffs).left(Patch_Margin); - } else { - postcontext = diff_text1(bigpatch.diffs); - } - if (!postcontext.isEmpty()) { - patch.length1 += postcontext.length(); - patch.length2 += postcontext.length(); - if (!patch.diffs.isEmpty() - && patch.diffs.back().operation == EQUAL) { - patch.diffs.back().text += postcontext; - } else { - patch.diffs.append(Diff(EQUAL, postcontext)); + // Remove the big old patch. + auto tmp_pointer = pointer + 1; + patches.erase(pointer); + pointer = tmp_pointer; + start1 = bigpatch.start1; + start2 = bigpatch.start2; + precontext = L""; + while (!bigpatch.diffs.empty()) { + // Create one of several smaller patches. + patch = Patch(); + empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if (!precontext.empty()) { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.push_back(Diff(EQUAL, precontext)); + } + while (!bigpatch.diffs.empty() + && patch.length1 < patch_size - Patch_Margin) { + diff_type = bigpatch.diffs.front().operation; + diff_text = bigpatch.diffs.front().text; + if (diff_type == INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.push_back(bigpatch.diffs.front()); + bigpatch.diffs.erase(bigpatch.diffs.begin()); + empty = false; + } else if (diff_type == DELETE && patch.diffs.size() == 1 + && patch.diffs.front().operation == EQUAL + && diff_text.length() > 2 * patch_size) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.push_back(Diff(diff_type, diff_text)); + bigpatch.diffs.erase(bigpatch.diffs.begin()); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substr(0, std::min(diff_text.length(), + (std::wstring::size_type)(patch_size - patch.length1 - Patch_Margin))); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if (diff_type == EQUAL) { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } else { + empty = false; + } + patch.diffs.push_back(Diff(diff_type, diff_text)); + if (diff_text == bigpatch.diffs.front().text) { + bigpatch.diffs.erase(bigpatch.diffs.begin()); + } else { + bigpatch.diffs.front().text = safeMid(bigpatch.diffs.front().text, + diff_text.length()); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2(patch.diffs); + precontext = safeMid(precontext, precontext.length() - Patch_Margin); + // Append the end context for this patch. + if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs).substr(0, Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + if (!postcontext.empty()) { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if (!patch.diffs.empty() + && patch.diffs.back().operation == EQUAL) { + patch.diffs.back().text += postcontext; + } else { + patch.diffs.push_back(Diff(EQUAL, postcontext)); + } + } + if (!empty) { + patches.insert(pointer, patch); + } } - } - if (!empty) { - pointer.insert(patch); - } + if (pointer != patches.end()) + bigpatch = *pointer++; + else + bigpatch = Patch(); } - bigpatch = pointer.hasNext() ? pointer.next() : Patch(); - } } -QString diff_match_patch::patch_toText(const QList &patches) { - QString text; - foreach(Patch aPatch, patches) { - text.append(aPatch.toString()); - } - return text; +std::wstring diff_match_patch::patch_toText(const std::vector &patches) { + std::wstring text; + for(Patch aPatch: patches) { + text.append(aPatch.toString()); + } + return text; } -QList diff_match_patch::patch_fromText(const QString &textline) { - QList patches; - if (textline.isEmpty()) { - return patches; - } - QStringList text = textline.split("\n", QString::SkipEmptyParts); - Patch patch; - QRegExp patchHeader("^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); - char sign; - QString line; - while (!text.isEmpty()) { - if (!patchHeader.exactMatch(text.front())) { - throw QString("Invalid patch string: %1").arg(text.front()); - } - - patch = Patch(); - patch.start1 = patchHeader.cap(1).toInt(); - if (patchHeader.cap(2).isEmpty()) { - patch.start1--; - patch.length1 = 1; - } else if (patchHeader.cap(2) == "0") { - patch.length1 = 0; - } else { - patch.start1--; - patch.length1 = patchHeader.cap(2).toInt(); +std::vector diff_match_patch::patch_fromText(const std::wstring &textline) { + std::vector patches; + if (textline.empty()) { + return patches; } + std::wstring_list text = std::split(textline, '\n', true); + Patch patch; + std::wregex patchHeader(L"^@@ -(\\d+),?(\\d*) \\+(\\d+),?(\\d*) @@$"); + wchar_t sign; + std::wstring line; + while (!text.empty()) { + std::wsmatch cm; + if (!std::regex_match(text.front(), cm, patchHeader)) { + throw std::format(L"Invalid patch string: %ls", text.front().c_str()); + } - patch.start2 = patchHeader.cap(3).toInt(); - if (patchHeader.cap(4).isEmpty()) { - patch.start2--; - patch.length2 = 1; - } else if (patchHeader.cap(4) == "0") { - patch.length2 = 0; - } else { - patch.start2--; - patch.length2 = patchHeader.cap(4).toInt(); - } - text.removeFirst(); - - while (!text.isEmpty()) { - if (text.front().isEmpty()) { - text.removeFirst(); - continue; - } - sign = text.front()[0].toAscii(); - line = safeMid(text.front(), 1); - line = line.replace("+", "%2B"); // decode would change all "+" to " " - line = QUrl::fromPercentEncoding(qPrintable(line)); - if (sign == '-') { - // Deletion. - patch.diffs.append(Diff(DELETE, line)); - } else if (sign == '+') { - // Insertion. - patch.diffs.append(Diff(INSERT, line)); - } else if (sign == ' ') { - // Minor equality. - patch.diffs.append(Diff(EQUAL, line)); - } else if (sign == '@') { - // Start of next patch. - break; - } else { - // WTF? - throw QString("Invalid patch mode '%1' in: %2").arg(sign).arg(line); - return QList(); - } - text.removeFirst(); - } - - patches.append(patch); - - } - return patches; + patch = Patch(); + patch.start1 = std::stoi(cm[1].str()); + if (cm[2].length() == 0) { + patch.start1--; + patch.length1 = 1; + } else if (cm[2].str() == L"0") { + patch.length1 = 0; + } else { + patch.start1--; + patch.length1 = std::stoi(cm[2].str()); + } + + patch.start2 = std::stoi(cm[3].str()); + if (cm[4].length() == 0) { + patch.start2--; + patch.length2 = 1; + } else if (cm[4].str() == L"0") { + patch.length2 = 0; + } else { + patch.start2--; + patch.length2 = std::stoi(cm[4].str()); + } + text.erase(text.begin()); + + while (!text.empty()) { + if (text.front().empty()) { + text.erase(text.begin()); + continue; + } + sign = text.front()[0]; + line = safeMid(text.front(), 1); + std::replace_all(line, L"+", L"%2B"); // decode would change all "+" to " " + line = std::url_decode(line); + if (sign == '-') { + // Deletion. + patch.diffs.push_back(Diff(DELETE, line)); + } else if (sign == '+') { + // Insertion. + patch.diffs.push_back(Diff(INSERT, line)); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.push_back(Diff(EQUAL, line)); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw std::format(L"Invalid patch mode '%c' in: %ls", sign, line.c_str()); + return std::vector(); + } + text.erase(text.begin()); + } + + patches.push_back(patch); + + } + return patches; } diff --git a/cpp/diff_match_patch.h b/cpp/diff_match_patch.h index 82d3283..e7f269f 100644 --- a/cpp/diff_match_patch.h +++ b/cpp/diff_match_patch.h @@ -26,91 +26,86 @@ * * @author fraser@google.com (Neil Fraser) * - * Qt/C++ port by mikeslemmer@gmail.com (Mike Slemmer): - * - * Code known to compile and run with Qt 4.3 through Qt 4.7. - * - * Here is a trivial sample program which works properly when linked with this - * library: - * - - #include - #include - #include - #include - #include + #include + #include + #include + #include + #include #include "diff_match_patch.h" int main(int argc, char **argv) { - diff_match_patch dmp; - QString str1 = QString("First string in diff"); - QString str2 = QString("Second string in diff"); + diff_match_patch dmp; + std::wstring str1 = std::wstring("First string in diff"); + std::wstring str2 = std::wstring("Second string in diff"); - QString strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); - QPair > out - = dmp.patch_apply(dmp.patch_fromText(strPatch), str1); - QString strResult = out.first; + std::wstring strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); + std::pair > out + = dmp.patch_apply(dmp.patch_fromText(strPatch), str1); + std::wstring strResult = out.first; - // here, strResult will equal str2 above. - return 0; + // here, strResult will equal str2 above. + return 0; } - */ +*/ +#include "diff_match_patch_util.h" +#include /**- -* The data structure representing a diff is a Linked list of Diff objects: -* {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), -* Diff(Operation.EQUAL, " world.")} -* which means: delete "Hello", add "Goodbye" and keep " world." -*/ + * The data structure representing a diff is a Linked list of Diff objects: + * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), + * Diff(Operation.EQUAL, " world.")} + * which means: delete "Hello", add "Goodbye" and keep " world." + */ enum Operation { - DELETE, INSERT, EQUAL + DELETE, INSERT, EQUAL }; /** -* Class representing one diff operation. -*/ + * Class representing one diff operation. + */ class Diff { - public: - Operation operation; - // One of: INSERT, DELETE or EQUAL. - QString text; - // The text associated with this diff operation. - - /** - * Constructor. Initializes the diff with the provided values. - * @param operation One of INSERT, DELETE or EQUAL. - * @param text The text being applied. - */ - Diff(Operation _operation, const QString &_text); - Diff(); - inline bool isNull() const; - QString toString() const; - bool operator==(const Diff &d) const; - bool operator!=(const Diff &d) const; - - static QString strOperation(Operation op); +public: + Operation operation; + // One of: INSERT, DELETE or EQUAL. + std::wstring text; + // The text associated with this diff operation. + bool invalid; + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + Diff(Operation _operation, const std::wstring &_text); + Diff(Operation _operation, const wchar_t * text); + Diff(); + inline bool isNull() const; + std::wstring toString() const; + bool operator==(const Diff &d) const; + bool operator!=(const Diff &d) const; + + static std::wstring strOperation(Operation op); }; /** -* Class representing one patch operation. -*/ + * Class representing one patch operation. + */ class Patch { - public: - QList diffs; - int start1; - int start2; - int length1; - int length2; - - /** - * Constructor. Initializes with an empty list of diffs. - */ - Patch(); - bool isNull() const; - QString toString(); +public: + std::vector diffs; + int start1; + int start2; + int length1; + int length2; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch(); + bool isNull() const; + std::wstring toString(); }; @@ -120,506 +115,506 @@ class Patch { */ class diff_match_patch { - friend class diff_match_patch_test; - - public: - // Defaults. - // Set these on your diff_match_patch instance to override the defaults. - - // Number of seconds to map a diff before giving up (0 for infinity). - float Diff_Timeout; - // Cost of an empty edit operation in terms of edit characters. - short Diff_EditCost; - // At what point is no match declared (0.0 = perfection, 1.0 = very loose). - float Match_Threshold; - // How far to search for a match (0 = exact location, 1000+ = broad match). - // A match this many characters away from the expected location will add - // 1.0 to the score (0.0 is a perfect match). - int Match_Distance; - // When deleting a large block of text (over ~64 characters), how close does - // the contents have to match the expected contents. (0.0 = perfection, - // 1.0 = very loose). Note that Match_Threshold controls how closely the - // end points of a delete need to match. - float Patch_DeleteThreshold; - // Chunk size for context length. - short Patch_Margin; - - // The number of bits in an int. - short Match_MaxBits; - - private: - // Define some regex patterns for matching boundaries. - static QRegExp BLANKLINEEND; - static QRegExp BLANKLINESTART; - - - public: - - diff_match_patch(); - - // DIFF FUNCTIONS - - - /** - * Find the differences between two texts. - * Run a faster slightly less optimal diff. - * This method allows the 'checklines' of diff_main() to be optional. - * Most of the time checklines is wanted, so default to true. - * @param text1 Old string to be diffed. - * @param text2 New string to be diffed. - * @return Linked List of Diff objects. - */ - QList diff_main(const QString &text1, const QString &text2); - - /** - * Find the differences between two texts. - * @param text1 Old string to be diffed. - * @param text2 New string to be diffed. - * @param checklines Speedup flag. If false, then don't run a - * line-level diff first to identify the changed areas. - * If true, then run a faster slightly less optimal diff. - * @return Linked List of Diff objects. - */ - QList diff_main(const QString &text1, const QString &text2, bool checklines); - - /** - * Find the differences between two texts. Simplifies the problem by - * stripping any common prefix or suffix off the texts before diffing. - * @param text1 Old string to be diffed. - * @param text2 New string to be diffed. - * @param checklines Speedup flag. If false, then don't run a - * line-level diff first to identify the changed areas. - * If true, then run a faster slightly less optimal diff. - * @param deadline Time when the diff should be complete by. Used - * internally for recursive calls. Users should set DiffTimeout instead. - * @return Linked List of Diff objects. - */ - private: - QList diff_main(const QString &text1, const QString &text2, bool checklines, clock_t deadline); - - /** - * Find the differences between two texts. Assumes that the texts do not - * have any common prefix or suffix. - * @param text1 Old string to be diffed. - * @param text2 New string to be diffed. - * @param checklines Speedup flag. If false, then don't run a - * line-level diff first to identify the changed areas. - * If true, then run a faster slightly less optimal diff. - * @param deadline Time when the diff should be complete by. - * @return Linked List of Diff objects. - */ - private: - QList diff_compute(QString text1, QString text2, bool checklines, clock_t deadline); - - /** - * Do a quick line-level diff on both strings, then rediff the parts for - * greater accuracy. - * This speedup can produce non-minimal diffs. - * @param text1 Old string to be diffed. - * @param text2 New string to be diffed. - * @param deadline Time when the diff should be complete by. - * @return Linked List of Diff objects. - */ - private: - QList diff_lineMode(QString text1, QString text2, clock_t deadline); - - /** - * Find the 'middle snake' of a diff, split the problem in two - * and return the recursively constructed diff. - * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. - * @param text1 Old string to be diffed. - * @param text2 New string to be diffed. - * @return Linked List of Diff objects. - */ - protected: - QList diff_bisect(const QString &text1, const QString &text2, clock_t deadline); - - /** - * Given the location of the 'middle snake', split the diff in two parts - * and recurse. - * @param text1 Old string to be diffed. - * @param text2 New string to be diffed. - * @param x Index of split point in text1. - * @param y Index of split point in text2. - * @param deadline Time at which to bail if not yet complete. - * @return LinkedList of Diff objects. - */ - private: - QList diff_bisectSplit(const QString &text1, const QString &text2, int x, int y, clock_t deadline); - - /** - * Split two texts into a list of strings. Reduce the texts to a string of - * hashes where each Unicode character represents one line. - * @param text1 First string. - * @param text2 Second string. - * @return Three element Object array, containing the encoded text1, the - * encoded text2 and the List of unique strings. The zeroth element - * of the List of unique strings is intentionally blank. - */ - protected: - QList diff_linesToChars(const QString &text1, const QString &text2); // return elems 0 and 1 are QString, elem 2 is QStringList - - /** - * Split a text into a list of strings. Reduce the texts to a string of - * hashes where each Unicode character represents one line. - * @param text String to encode. - * @param lineArray List of unique strings. - * @param lineHash Map of strings to indices. - * @return Encoded string. - */ - private: - QString diff_linesToCharsMunge(const QString &text, QStringList &lineArray, - QMap &lineHash); - - /** - * Rehydrate the text in a diff from a string of line hashes to real lines of - * text. - * @param diffs LinkedList of Diff objects. - * @param lineArray List of unique strings. - */ - private: - void diff_charsToLines(QList &diffs, const QStringList &lineArray); - - /** - * Determine the common prefix of two strings. - * @param text1 First string. - * @param text2 Second string. - * @return The number of characters common to the start of each string. - */ - public: - int diff_commonPrefix(const QString &text1, const QString &text2); - - /** - * Determine the common suffix of two strings. - * @param text1 First string. - * @param text2 Second string. - * @return The number of characters common to the end of each string. - */ - public: - int diff_commonSuffix(const QString &text1, const QString &text2); - - /** - * Determine if the suffix of one string is the prefix of another. - * @param text1 First string. - * @param text2 Second string. - * @return The number of characters common to the end of the first - * string and the start of the second string. - */ - protected: - int diff_commonOverlap(const QString &text1, const QString &text2); - - /** - * Do the two texts share a substring which is at least half the length of - * the longer text? - * This speedup can produce non-minimal diffs. - * @param text1 First string. - * @param text2 Second string. - * @return Five element String array, containing the prefix of text1, the - * suffix of text1, the prefix of text2, the suffix of text2 and the - * common middle. Or null if there was no match. - */ - protected: - QStringList diff_halfMatch(const QString &text1, const QString &text2); - - /** - * Does a substring of shorttext exist within longtext such that the - * substring is at least half the length of longtext? - * @param longtext Longer string. - * @param shorttext Shorter string. - * @param i Start index of quarter length substring within longtext. - * @return Five element String array, containing the prefix of longtext, the - * suffix of longtext, the prefix of shorttext, the suffix of shorttext - * and the common middle. Or null if there was no match. - */ - private: - QStringList diff_halfMatchI(const QString &longtext, const QString &shorttext, int i); - - /** - * Reduce the number of edits by eliminating semantically trivial equalities. - * @param diffs LinkedList of Diff objects. - */ - public: - void diff_cleanupSemantic(QList &diffs); - - /** - * Look for single edits surrounded on both sides by equalities - * which can be shifted sideways to align the edit to a word boundary. - * e.g: The cat came. -> The cat came. - * @param diffs LinkedList of Diff objects. - */ - public: - void diff_cleanupSemanticLossless(QList &diffs); - - /** - * Given two strings, compute a score representing whether the internal - * boundary falls on logical boundaries. - * Scores range from 6 (best) to 0 (worst). - * @param one First string. - * @param two Second string. - * @return The score. - */ - private: - int diff_cleanupSemanticScore(const QString &one, const QString &two); - - /** - * Reduce the number of edits by eliminating operationally trivial equalities. - * @param diffs LinkedList of Diff objects. - */ - public: - void diff_cleanupEfficiency(QList &diffs); - - /** - * Reorder and merge like edit sections. Merge equalities. - * Any edit section can move as long as it doesn't cross an equality. - * @param diffs LinkedList of Diff objects. - */ - public: - void diff_cleanupMerge(QList &diffs); - - /** - * loc is a location in text1, compute and return the equivalent location in - * text2. - * e.g. "The cat" vs "The big cat", 1->1, 5->8 - * @param diffs LinkedList of Diff objects. - * @param loc Location within text1. - * @return Location within text2. - */ - public: - int diff_xIndex(const QList &diffs, int loc); - - /** - * Convert a Diff list into a pretty HTML report. - * @param diffs LinkedList of Diff objects. - * @return HTML representation. - */ - public: - QString diff_prettyHtml(const QList &diffs); - - /** - * Compute and return the source text (all equalities and deletions). - * @param diffs LinkedList of Diff objects. - * @return Source text. - */ - public: - QString diff_text1(const QList &diffs); - - /** - * Compute and return the destination text (all equalities and insertions). - * @param diffs LinkedList of Diff objects. - * @return Destination text. - */ - public: - QString diff_text2(const QList &diffs); - - /** - * Compute the Levenshtein distance; the number of inserted, deleted or - * substituted characters. - * @param diffs LinkedList of Diff objects. - * @return Number of changes. - */ - public: - int diff_levenshtein(const QList &diffs); - - /** - * Crush the diff into an encoded string which describes the operations - * required to transform text1 into text2. - * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. - * Operations are tab-separated. Inserted text is escaped using %xx notation. - * @param diffs Array of diff tuples. - * @return Delta text. - */ - public: - QString diff_toDelta(const QList &diffs); - - /** - * Given the original text1, and an encoded string which describes the - * operations required to transform text1 into text2, compute the full diff. - * @param text1 Source string for the diff. - * @param delta Delta text. - * @return Array of diff tuples or null if invalid. - * @throws QString If invalid input. - */ - public: - QList diff_fromDelta(const QString &text1, const QString &delta); - - - // MATCH FUNCTIONS - - - /** - * Locate the best instance of 'pattern' in 'text' near 'loc'. - * Returns -1 if no match found. - * @param text The text to search. - * @param pattern The pattern to search for. - * @param loc The location to search around. - * @return Best match index or -1. - */ - public: - int match_main(const QString &text, const QString &pattern, int loc); - - /** - * Locate the best instance of 'pattern' in 'text' near 'loc' using the - * Bitap algorithm. Returns -1 if no match found. - * @param text The text to search. - * @param pattern The pattern to search for. - * @param loc The location to search around. - * @return Best match index or -1. - */ - protected: - int match_bitap(const QString &text, const QString &pattern, int loc); - - /** - * Compute and return the score for a match with e errors and x location. - * @param e Number of errors in match. - * @param x Location of match. - * @param loc Expected location of match. - * @param pattern Pattern being sought. - * @return Overall score for match (0.0 = good, 1.0 = bad). - */ - private: - double match_bitapScore(int e, int x, int loc, const QString &pattern); - - /** - * Initialise the alphabet for the Bitap algorithm. - * @param pattern The text to encode. - * @return Hash of character locations. - */ - protected: - QMap match_alphabet(const QString &pattern); - - - // PATCH FUNCTIONS - - - /** - * Increase the context until it is unique, - * but don't let the pattern expand beyond Match_MaxBits. - * @param patch The patch to grow. - * @param text Source text. - */ - protected: - void patch_addContext(Patch &patch, const QString &text); - - /** - * Compute a list of patches to turn text1 into text2. - * A set of diffs will be computed. - * @param text1 Old text. - * @param text2 New text. - * @return LinkedList of Patch objects. - */ - public: - QList patch_make(const QString &text1, const QString &text2); - - /** - * Compute a list of patches to turn text1 into text2. - * text1 will be derived from the provided diffs. - * @param diffs Array of diff tuples for text1 to text2. - * @return LinkedList of Patch objects. - */ - public: - QList patch_make(const QList &diffs); - - /** - * Compute a list of patches to turn text1 into text2. - * text2 is ignored, diffs are the delta between text1 and text2. - * @param text1 Old text. - * @param text2 Ignored. - * @param diffs Array of diff tuples for text1 to text2. - * @return LinkedList of Patch objects. - * @deprecated Prefer patch_make(const QString &text1, const QList &diffs). - */ - public: - QList patch_make(const QString &text1, const QString &text2, const QList &diffs); - - /** - * Compute a list of patches to turn text1 into text2. - * text2 is not provided, diffs are the delta between text1 and text2. - * @param text1 Old text. - * @param diffs Array of diff tuples for text1 to text2. - * @return LinkedList of Patch objects. - */ - public: - QList patch_make(const QString &text1, const QList &diffs); - - /** - * Given an array of patches, return another array that is identical. - * @param patches Array of patch objects. - * @return Array of patch objects. - */ - public: - QList patch_deepCopy(QList &patches); - - /** - * Merge a set of patches onto the text. Return a patched text, as well - * as an array of true/false values indicating which patches were applied. - * @param patches Array of patch objects. - * @param text Old text. - * @return Two element Object array, containing the new text and an array of - * boolean values. - */ - public: - QPair > patch_apply(QList &patches, const QString &text); - - /** - * Add some padding on text start and end so that edges can match something. - * Intended to be called only from within patch_apply. - * @param patches Array of patch objects. - * @return The padding string added to each side. - */ - public: - QString patch_addPadding(QList &patches); - - /** - * Look through the patches and break up any which are longer than the - * maximum limit of the match algorithm. - * Intended to be called only from within patch_apply. - * @param patches LinkedList of Patch objects. - */ - public: - void patch_splitMax(QList &patches); - - /** - * Take a list of patches and return a textual representation. - * @param patches List of Patch objects. - * @return Text representation of patches. - */ - public: - QString patch_toText(const QList &patches); - - /** - * Parse a textual representation of patches and return a List of Patch - * objects. - * @param textline Text representation of patches. - * @return List of Patch objects. - * @throws QString If invalid input. - */ - public: - QList patch_fromText(const QString &textline); - - /** - * A safer version of QString.mid(pos). This one returns "" instead of - * null when the postion equals the string length. - * @param str String to take a substring from. - * @param pos Position to start the substring from. - * @return Substring. - */ - private: - static inline QString safeMid(const QString &str, int pos) { - return (pos == str.length()) ? QString("") : str.mid(pos); - } - - /** - * A safer version of QString.mid(pos, len). This one returns "" instead of - * null when the postion equals the string length. - * @param str String to take a substring from. - * @param pos Position to start the substring from. - * @param len Length of substring. - * @return Substring. - */ - private: - static inline QString safeMid(const QString &str, int pos, int len) { - return (pos == str.length()) ? QString("") : str.mid(pos, len); - } + friend class diff_match_patch_test; + +public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int Match_Distance; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold; + // Chunk size for context length. + short Patch_Margin; + + // The number of bits in an int. + short Match_MaxBits; + +private: + // Define some regex patterns for matching boundaries. + static std::wregex BLANKLINEEND; + static std::wregex BLANKLINESTART; + + +public: + + diff_match_patch(); + + // DIFF FUNCTIONS + + + /** + * Find the differences between two texts. + * Run a faster slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + std::vector diff_main(const std::wstring &text1, const std::wstring &text2); + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return Linked List of Diff objects. + */ + std::vector diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines); + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * @return Linked List of Diff objects. + */ +private: + std::vector diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline); + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ +private: + std::vector diff_compute(std::wstring text1, std::wstring text2, bool checklines, clock_t deadline); + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ +private: + std::vector diff_lineMode(std::wstring text1, std::wstring text2, clock_t deadline); + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ +protected: + std::vector diff_bisect(const std::wstring &text1, const std::wstring &text2, clock_t deadline); + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ +private: + std::vector diff_bisectSplit(const std::wstring &text1, const std::wstring &text2, int x, int y, clock_t deadline); + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return Three element Object array, containing the encoded text1, the + * encoded text2 and the List of unique strings. The zeroth element + * of the List of unique strings is intentionally blank. + */ +protected: + std::vector diff_linesToChars(const std::wstring &text1, const std::wstring &text2); // return elems 0 and 1 are std::wstring, elem 2 is std::wstring_list + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ +private: + std::wstring diff_linesToCharsMunge(const std::wstring &text, std::wstring_list &lineArray, + std::unordered_map &lineHash); + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param diffs LinkedList of Diff objects. + * @param lineArray List of unique strings. + */ +private: + void diff_charsToLines(std::vector &diffs, const std::wstring_list &lineArray); + + /** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ +public: + int diff_commonPrefix(const std::wstring &text1, const std::wstring &text2); + + /** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ +public: + int diff_commonSuffix(const std::wstring &text1, const std::wstring &text2); + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ +protected: + int diff_commonOverlap(const std::wstring &text1, const std::wstring &text2); + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ +protected: + std::wstring_list diff_halfMatch(const std::wstring &text1, const std::wstring &text2); + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length substring within longtext. + * @return Five element String array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ +private: + std::wstring_list diff_halfMatchI(const std::wstring &longtext, const std::wstring &shorttext, int i); + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupSemantic(std::vector &diffs); + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupSemanticLossless(std::vector &diffs); + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ +private: + int diff_cleanupSemanticScore(const std::wstring &one, const std::wstring &two); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupEfficiency(std::vector &diffs); + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs LinkedList of Diff objects. + */ +public: + void diff_cleanupMerge(std::vector &diffs); + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs LinkedList of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ +public: + int diff_xIndex(const std::vector &diffs, int loc); + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs LinkedList of Diff objects. + * @return HTML representation. + */ +public: + std::wstring diff_prettyHtml(const std::vector &diffs); + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs LinkedList of Diff objects. + * @return Source text. + */ +public: + std::wstring diff_text1(const std::vector &diffs); + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs LinkedList of Diff objects. + * @return Destination text. + */ +public: + std::wstring diff_text2(const std::vector &diffs); + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs LinkedList of Diff objects. + * @return Number of changes. + */ +public: + int diff_levenshtein(const std::vector &diffs); + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param diffs Array of diff tuples. + * @return Delta text. + */ +public: + std::wstring diff_toDelta(const std::vector &diffs); + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of diff tuples or null if invalid. + * @throws std::wstring If invalid input. + */ +public: + std::vector diff_fromDelta(const std::wstring &text1, const std::wstring &delta); + + + // MATCH FUNCTIONS + + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ +public: + int match_main(const std::wstring &text, const std::wstring &pattern, int loc); + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ +protected: + int match_bitap(const std::wstring &text, const std::wstring &pattern, int loc); + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ +private: + double match_bitapScore(int e, int x, int loc, const std::wstring &pattern); + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ +protected: + std::unordered_map match_alphabet(const std::wstring &pattern); + + + // PATCH FUNCTIONS + + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ +protected: + void patch_addContext(Patch &patch, const std::wstring &text); + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return LinkedList of Patch objects. + */ +public: + std::vector patch_make(const std::wstring &text1, const std::wstring &text2); + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ +public: + std::vector patch_make(const std::vector &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param text2 Ignored. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + * @deprecated Prefer patch_make(const std::wstring &text1, const std::vector &diffs). + */ +public: + std::vector patch_make(const std::wstring &text1, const std::wstring &text2, const std::vector &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ +public: + std::vector patch_make(const std::wstring &text1, const std::vector &diffs); + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of patch objects. + * @return Array of patch objects. + */ +public: + std::vector patch_deepCopy(std::vector &patches); + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of patch objects. + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * boolean values. + */ +public: + std::pair > patch_apply(std::vector &patches, const std::wstring &text); + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of patch objects. + * @return The padding string added to each side. + */ +public: + std::wstring patch_addPadding(std::vector &patches); + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches LinkedList of Patch objects. + */ +public: + void patch_splitMax(std::vector &patches); + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ +public: + std::wstring patch_toText(const std::vector &patches); + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws std::wstring If invalid input. + */ +public: + std::vector patch_fromText(const std::wstring &textline); + + /** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ +private: + static inline std::wstring safeMid(const std::wstring &str, int pos) { + return (pos == str.length()) ? std::wstring(L"") : str.substr(pos); + } + + /** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ +private: + static inline std::wstring safeMid(const std::wstring &str, int pos, int len) { + return (pos == str.length()) ? std::wstring(L"") : str.substr(pos, len); + } }; #endif // DIFF_MATCH_PATCH_H diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index f75b1cd..41aae8c 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -17,18 +17,22 @@ */ // Code known to compile and run with Qt 4.3 through Qt 4.7. -#include +#include +#include +#include #include "diff_match_patch.h" #include "diff_match_patch_test.h" +#define qPrintable(x) (x.c_str()) + int main(int argc, char **argv) { - diff_match_patch_test dmp_test; - qDebug("Starting diff_match_patch unit tests."); - dmp_test.run_all_tests(); - qDebug("Done."); - return 0; - Q_UNUSED(argc) - Q_UNUSED(argv) + diff_match_patch_test dmp_test; + std::debug_print(L"Starting diff_match_patch unit tests."); + dmp_test.run_all_tests(); + std::debug_print(L"Done."); + return 0; + (void)argc; + (void)argv; } @@ -36,558 +40,555 @@ diff_match_patch_test::diff_match_patch_test() { } void diff_match_patch_test::run_all_tests() { - QTime t; - t.start(); - try { - testDiffCommonPrefix(); - testDiffCommonSuffix(); - testDiffCommonOverlap(); - testDiffHalfmatch(); - testDiffLinesToChars(); - testDiffCharsToLines(); - testDiffCleanupMerge(); - testDiffCleanupSemanticLossless(); - testDiffCleanupSemantic(); - testDiffCleanupEfficiency(); - testDiffPrettyHtml(); - testDiffText(); - testDiffDelta(); - testDiffXIndex(); - testDiffLevenshtein(); - testDiffBisect(); - testDiffMain(); - - testMatchAlphabet(); - testMatchBitap(); - testMatchMain(); - - testPatchObj(); - testPatchFromText(); - testPatchToText(); - testPatchAddContext(); - testPatchMake(); - testPatchSplitMax(); - testPatchAddPadding(); - testPatchApply(); - qDebug("All tests passed."); - } catch (QString strCase) { - qDebug("Test failed: %s", qPrintable(strCase)); - } - qDebug("Total time: %d ms", t.elapsed()); + try { + testDiffCommonPrefix(); + testDiffCommonSuffix(); + testDiffCommonOverlap(); + testDiffHalfmatch(); + testDiffLinesToChars(); + testDiffCharsToLines(); + testDiffCleanupMerge(); + testDiffCleanupSemanticLossless(); + testDiffCleanupSemantic(); + testDiffCleanupEfficiency(); + testDiffPrettyHtml(); + testDiffText(); + testDiffDelta(); + testDiffXIndex(); + testDiffLevenshtein(); + testDiffBisect(); + testDiffMain(); + + testMatchAlphabet(); + testMatchBitap(); + testMatchMain(); + + testPatchObj(); + testPatchFromText(); + testPatchToText(); + testPatchAddContext(); + testPatchMake(); + testPatchSplitMax(); + testPatchAddPadding(); + testPatchApply(); + std::debug_print(L"All tests passed."); + } catch (std::wstring strCase) { + std::debug_print(L"Test failed: %ls", qPrintable(strCase)); + } } // DIFF TEST FUNCTIONS void diff_match_patch_test::testDiffCommonPrefix() { - // Detect any common prefix. - assertEquals("diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix("abc", "xyz")); + // Detect any common prefix. + assertEquals(L"diff_commonPrefix: Null case.", 0, dmp.diff_commonPrefix(L"abc", L"xyz")); - assertEquals("diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix("1234abcdef", "1234xyz")); + assertEquals(L"diff_commonPrefix: Non-null case.", 4, dmp.diff_commonPrefix(L"1234abcdef", L"1234xyz")); - assertEquals("diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix("1234", "1234xyz")); + assertEquals(L"diff_commonPrefix: Whole case.", 4, dmp.diff_commonPrefix(L"1234", L"1234xyz")); } void diff_match_patch_test::testDiffCommonSuffix() { - // Detect any common suffix. - assertEquals("diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix("abc", "xyz")); + // Detect any common suffix. + assertEquals(L"diff_commonSuffix: Null case.", 0, dmp.diff_commonSuffix(L"abc", L"xyz")); - assertEquals("diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix("abcdef1234", "xyz1234")); + assertEquals(L"diff_commonSuffix: Non-null case.", 4, dmp.diff_commonSuffix(L"abcdef1234", L"xyz1234")); - assertEquals("diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix("1234", "xyz1234")); + assertEquals(L"diff_commonSuffix: Whole case.", 4, dmp.diff_commonSuffix(L"1234", L"xyz1234")); } void diff_match_patch_test::testDiffCommonOverlap() { - // Detect any suffix/prefix overlap. - assertEquals("diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap("", "abcd")); + // Detect any suffix/prefix overlap. + assertEquals(L"diff_commonOverlap: Null case.", 0, dmp.diff_commonOverlap(L"", L"abcd")); - assertEquals("diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap("abc", "abcd")); + assertEquals(L"diff_commonOverlap: Whole case.", 3, dmp.diff_commonOverlap(L"abc", L"abcd")); - assertEquals("diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap("123456", "abcd")); + assertEquals(L"diff_commonOverlap: No overlap.", 0, dmp.diff_commonOverlap(L"123456", L"abcd")); - assertEquals("diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap("123456xxx", "xxxabcd")); + assertEquals(L"diff_commonOverlap: Overlap.", 3, dmp.diff_commonOverlap(L"123456xxx", L"xxxabcd")); - // Some overly clever languages (C#) may treat ligatures as equal to their - // component letters. E.g. U+FB01 == 'fi' - assertEquals("diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap("fi", QString::fromWCharArray((const wchar_t*) L"\ufb01i", 2))); + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals(L"diff_commonOverlap: Unicode.", 0, dmp.diff_commonOverlap(L"fi", std::wstring(L"\ufb01i", 2))); } void diff_match_patch_test::testDiffHalfmatch() { - // Detect a halfmatch. - dmp.Diff_Timeout = 1; - assertEmpty("diff_halfMatch: No match #1.", dmp.diff_halfMatch("1234567890", "abcdef")); + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty(L"diff_halfMatch: No match #1.", dmp.diff_halfMatch(L"1234567890", L"abcdef")); - assertEmpty("diff_halfMatch: No match #2.", dmp.diff_halfMatch("12345", "23")); + assertEmpty(L"diff_halfMatch: No match #2.", dmp.diff_halfMatch(L"12345", L"23")); - assertEquals("diff_halfMatch: Single Match #1.", QString("12,90,a,z,345678").split(","), dmp.diff_halfMatch("1234567890", "a345678z")); + assertEquals(L"diff_halfMatch: Single Match #1.", std::split(L"12,90,a,z,345678", ','), dmp.diff_halfMatch(L"1234567890", L"a345678z")); - assertEquals("diff_halfMatch: Single Match #2.", QString("a,z,12,90,345678").split(","), dmp.diff_halfMatch("a345678z", "1234567890")); + assertEquals(L"diff_halfMatch: Single Match #2.", std::split(L"a,z,12,90,345678", ','), dmp.diff_halfMatch(L"a345678z", L"1234567890")); - assertEquals("diff_halfMatch: Single Match #3.", QString("abc,z,1234,0,56789").split(","), dmp.diff_halfMatch("abc56789z", "1234567890")); + assertEquals(L"diff_halfMatch: Single Match #3.", std::split(L"abc,z,1234,0,56789", ','), dmp.diff_halfMatch(L"abc56789z", L"1234567890")); - assertEquals("diff_halfMatch: Single Match #4.", QString("a,xyz,1,7890,23456").split(","), dmp.diff_halfMatch("a23456xyz", "1234567890")); + assertEquals(L"diff_halfMatch: Single Match #4.", std::split(L"a,xyz,1,7890,23456", ','), dmp.diff_halfMatch(L"a23456xyz", L"1234567890")); - assertEquals("diff_halfMatch: Multiple Matches #1.", QString("12123,123121,a,z,1234123451234").split(","), dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + assertEquals(L"diff_halfMatch: Multiple Matches #1.", std::split(L"12123,123121,a,z,1234123451234", ','), dmp.diff_halfMatch(L"121231234123451234123121", L"a1234123451234z")); - assertEquals("diff_halfMatch: Multiple Matches #2.", QString(",-=-=-=-=-=,x,,x-=-=-=-=-=-=-=").split(","), dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + assertEquals(L"diff_halfMatch: Multiple Matches #2.", std::split(L",-=-=-=-=-=,x,,x-=-=-=-=-=-=-=", ','), dmp.diff_halfMatch(L"x-=-=-=-=-=-=-=-=-=-=-=-=", L"xx-=-=-=-=-=-=-=")); - assertEquals("diff_halfMatch: Multiple Matches #3.", QString("-=-=-=-=-=,,,y,-=-=-=-=-=-=-=y").split(","), dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + assertEquals(L"diff_halfMatch: Multiple Matches #3.", std::split(L"-=-=-=-=-=,,,y,-=-=-=-=-=-=-=y", ','), dmp.diff_halfMatch(L"-=-=-=-=-=-=-=-=-=-=-=-=y", L"-=-=-=-=-=-=-=yy")); - // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy - assertEquals("diff_halfMatch: Non-optimal halfmatch.", QString("qHillo,w,x,Hulloy,HelloHe").split(","), dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not -qHillo+x=HelloHe-w+Hulloy + assertEquals(L"diff_halfMatch: Non-optimal halfmatch.", std::split(L"qHillo,w,x,Hulloy,HelloHe", ','), dmp.diff_halfMatch(L"qHilloHelloHew", L"xHelloHeHulloy")); - dmp.Diff_Timeout = 0; - assertEmpty("diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + dmp.Diff_Timeout = 0; + assertEmpty(L"diff_halfMatch: Optimal no halfmatch.", dmp.diff_halfMatch(L"qHilloHelloHew", L"xHelloHeHulloy")); } void diff_match_patch_test::testDiffLinesToChars() { - // Convert lines down to characters. - QStringList tmpVector; - QList tmpVarList; - tmpVector.append(""); - tmpVector.append("alpha\n"); - tmpVector.append("beta\n"); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); //(("\u0001\u0002\u0001")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // (("\u0002\u0001\u0002")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.append(""); - tmpVector.append("alpha\r\n"); - tmpVector.append("beta\r\n"); - tmpVector.append("\r\n"); - tmpVarList << QVariant::fromValue(QString("")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)3) + QChar((ushort)3)); // (("\u0001\u0002\u0003\u0003")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); - - tmpVector.clear(); - tmpVarList.clear(); - tmpVector.append(""); - tmpVector.append("a"); - tmpVector.append("b"); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)1)); // (("\u0001")); - tmpVarList << QVariant::fromValue(QString() + QChar((ushort)2)); // (("\u0002")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars:", tmpVarList, dmp.diff_linesToChars("a", "b")); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - tmpVarList.clear(); - QString lines; - QString chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.append(QString::number(x) + "\n"); - lines += QString::number(x) + "\n"; - chars += QChar(static_cast(x)); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.prepend(""); - tmpVarList << QVariant::fromValue(chars); - tmpVarList << QVariant::fromValue(QString("")); - tmpVarList << QVariant::fromValue(tmpVector); - assertEquals("diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, "")); + // Convert lines down to characters. + std::wstring_list tmpVector; + std::vector tmpVarList; + tmpVector.push_back(L""); + tmpVector.push_back(L"alpha\n"); + tmpVector.push_back(L"beta\n"); + tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(1) + wchar_t(2) + wchar_t(1))); //((L"\u0001\u0002\u0001")); + tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(2) + wchar_t(1) + wchar_t(2))); // ((L"\u0002\u0001\u0002")); + tmpVarList.push_back(std::dmp_variant(tmpVector)); + assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"alpha\nbeta\nalpha\n", L"beta\nalpha\nbeta\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.push_back(L""); + tmpVector.push_back(L"alpha\r\n"); + tmpVector.push_back(L"beta\r\n"); + tmpVector.push_back(L"\r\n"); + tmpVarList.push_back(std::dmp_variant(std::wstring(L""))); + tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(1) + wchar_t(2) + wchar_t(3) + wchar_t(3))); // ((L"\u0001\u0002\u0003\u0003")); + tmpVarList.push_back(std::dmp_variant(tmpVector)); + assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"", L"alpha\r\nbeta\r\n\r\n\r\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.push_back(L""); + tmpVector.push_back(L"a"); + tmpVector.push_back(L"b"); + tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(1))); // ((L"\u0001"))); + tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(2))); // ((L"\u0002")); + tmpVarList.push_back(std::dmp_variant(tmpVector)); + assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"a", L"b")); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + std::wstring lines; + std::wstring chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.push_back(std::to_wstring(x) + L"\n"); + lines += std::to_wstring(x) + L"\n"; + chars += wchar_t(x); + } + assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); + assertEquals(L"diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.push_back(L""); + tmpVarList.push_back(std::dmp_variant(chars)); + tmpVarList.push_back(std::dmp_variant(std::wstring(L""))); + tmpVarList.push_back(std::dmp_variant(tmpVector)); + assertEquals(L"diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, L"")); } void diff_match_patch_test::testDiffCharsToLines() { - // First check that Diff equality works. - assertTrue("diff_charsToLines:", Diff(EQUAL, "a") == Diff(EQUAL, "a")); - - assertEquals("diff_charsToLines:", Diff(EQUAL, "a"), Diff(EQUAL, "a")); - - // Convert chars up to lines. - QList diffs; - diffs << Diff(EQUAL, QString() + QChar((ushort)1) + QChar((ushort)2) + QChar((ushort)1)); // ("\u0001\u0002\u0001"); - diffs << Diff(INSERT, QString() + QChar((ushort)2) + QChar((ushort)1) + QChar((ushort)2)); // ("\u0002\u0001\u0002"); - QStringList tmpVector; - tmpVector.append(""); - tmpVector.append("alpha\n"); - tmpVector.append("beta\n"); - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines:", diffList(Diff(EQUAL, "alpha\nbeta\nalpha\n"), Diff(INSERT, "beta\nalpha\nbeta\n")), diffs); - - // More than 256 to reveal any 8-bit limitations. - int n = 300; - tmpVector.clear(); - QList tmpVarList; - QString lines; - QString chars; - for (int x = 1; x < n + 1; x++) { - tmpVector.append(QString::number(x) + "\n"); - lines += QString::number(x) + "\n"; - chars += QChar(static_cast(x)); - } - assertEquals("diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); - assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.prepend(""); - diffs = diffList(Diff(DELETE, chars)); - dmp.diff_charsToLines(diffs, tmpVector); - assertEquals("diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); + // First check that Diff equality works. + assertTrue(L"diff_charsToLines:", Diff(EQUAL, L"a") == Diff(EQUAL, L"a")); + + assertEquals(L"diff_charsToLines:", Diff(EQUAL, L"a"), Diff(EQUAL, L"a")); + + // Convert chars up to lines. + std::vector diffs; + diffs.push_back(Diff(EQUAL, std::wstring() + wchar_t(1) + wchar_t(2) + wchar_t(1))); // (L"\u0001\u0002\u0001"); + diffs.push_back(Diff(INSERT, std::wstring() + wchar_t(2) + wchar_t(1) + wchar_t(2))); // (L"\u0002\u0001\u0002"); + std::wstring_list tmpVector; + tmpVector.push_back(L""); + tmpVector.push_back(L"alpha\n"); + tmpVector.push_back(L"beta\n"); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals(L"diff_charsToLines:", diffList(Diff(EQUAL, L"alpha\nbeta\nalpha\n"), Diff(INSERT, L"beta\nalpha\nbeta\n")), diffs); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + std::vector tmpVarList; + std::wstring lines; + std::wstring chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.push_back(std::to_wstring(x) + L"\n"); + lines += std::to_wstring(x) + L"\n"; + chars += wchar_t(x); + } + assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); + assertEquals(L"diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.push_back(L""); + diffs = diffList(Diff(DELETE, chars)); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals(L"diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); } void diff_match_patch_test::testDiffCleanupMerge() { - // Cleanup a messy diff. - QList diffs; - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Null case.", diffList(), diffs); - - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: No change case.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")), diffs); - - diffs = diffList(Diff(EQUAL, "a"), Diff(EQUAL, "b"), Diff(EQUAL, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge equalities.", diffList(Diff(EQUAL, "abc")), diffs); - - diffs = diffList(Diff(DELETE, "a"), Diff(DELETE, "b"), Diff(DELETE, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge deletions.", diffList(Diff(DELETE, "abc")), diffs); - - diffs = diffList(Diff(INSERT, "a"), Diff(INSERT, "b"), Diff(INSERT, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge insertions.", diffList(Diff(INSERT, "abc")), diffs); - - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b"), Diff(DELETE, "c"), Diff(INSERT, "d"), Diff(EQUAL, "e"), Diff(EQUAL, "f")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Merge interweave.", diffList(Diff(DELETE, "ac"), Diff(INSERT, "bd"), Diff(EQUAL, "ef")), diffs); - - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Prefix and suffix detection.", diffList(Diff(EQUAL, "a"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "c")), diffs); - - diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc"), Diff(EQUAL, "y")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList(Diff(EQUAL, "xa"), Diff(DELETE, "d"), Diff(INSERT, "b"), Diff(EQUAL, "cy")), diffs); - - diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "ba"), Diff(EQUAL, "c")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left.", diffList(Diff(INSERT, "ab"), Diff(EQUAL, "ac")), diffs); - - diffs = diffList(Diff(EQUAL, "c"), Diff(INSERT, "ab"), Diff(EQUAL, "a")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right.", diffList(Diff(EQUAL, "ca"), Diff(INSERT, "ba")), diffs); - - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(EQUAL, "c"), Diff(DELETE, "ac"), Diff(EQUAL, "x")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit left recursive.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "acx")), diffs); - - diffs = diffList(Diff(EQUAL, "x"), Diff(DELETE, "ca"), Diff(EQUAL, "c"), Diff(DELETE, "b"), Diff(EQUAL, "a")); - dmp.diff_cleanupMerge(diffs); - assertEquals("diff_cleanupMerge: Slide edit right recursive.", diffList(Diff(EQUAL, "xca"), Diff(DELETE, "cba")), diffs); + // Cleanup a messy diff. + std::vector diffs; + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Null case.", diffList(), diffs); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"b"), Diff(INSERT, L"c")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: No change case.", diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"b"), Diff(INSERT, L"c")), diffs); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(EQUAL, L"b"), Diff(EQUAL, L"c")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Merge equalities.", diffList(Diff(EQUAL, L"abc")), diffs); + + diffs = diffList(Diff(DELETE, L"a"), Diff(DELETE, L"b"), Diff(DELETE, L"c")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Merge deletions.", diffList(Diff(DELETE, L"abc")), diffs); + + diffs = diffList(Diff(INSERT, L"a"), Diff(INSERT, L"b"), Diff(INSERT, L"c")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Merge insertions.", diffList(Diff(INSERT, L"abc")), diffs); + + diffs = diffList(Diff(DELETE, L"a"), Diff(INSERT, L"b"), Diff(DELETE, L"c"), Diff(INSERT, L"d"), Diff(EQUAL, L"e"), Diff(EQUAL, L"f")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Merge interweave.", diffList(Diff(DELETE, L"ac"), Diff(INSERT, L"bd"), Diff(EQUAL, L"ef")), diffs); + + diffs = diffList(Diff(DELETE, L"a"), Diff(INSERT, L"abc"), Diff(DELETE, L"dc")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Prefix and suffix detection.", diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"d"), Diff(INSERT, L"b"), Diff(EQUAL, L"c")), diffs); + + diffs = diffList(Diff(EQUAL, L"x"), Diff(DELETE, L"a"), Diff(INSERT, L"abc"), Diff(DELETE, L"dc"), Diff(EQUAL, L"y")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Prefix and suffix detection with equalities.", diffList(Diff(EQUAL, L"xa"), Diff(DELETE, L"d"), Diff(INSERT, L"b"), Diff(EQUAL, L"cy")), diffs); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(INSERT, L"ba"), Diff(EQUAL, L"c")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Slide edit left.", diffList(Diff(INSERT, L"ab"), Diff(EQUAL, L"ac")), diffs); + + diffs = diffList(Diff(EQUAL, L"c"), Diff(INSERT, L"ab"), Diff(EQUAL, L"a")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Slide edit right.", diffList(Diff(EQUAL, L"ca"), Diff(INSERT, L"ba")), diffs); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"b"), Diff(EQUAL, L"c"), Diff(DELETE, L"ac"), Diff(EQUAL, L"x")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Slide edit left recursive.", diffList(Diff(DELETE, L"abc"), Diff(EQUAL, L"acx")), diffs); + + diffs = diffList(Diff(EQUAL, L"x"), Diff(DELETE, L"ca"), Diff(EQUAL, L"c"), Diff(DELETE, L"b"), Diff(EQUAL, L"a")); + dmp.diff_cleanupMerge(diffs); + assertEquals(L"diff_cleanupMerge: Slide edit right recursive.", diffList(Diff(EQUAL, L"xca"), Diff(DELETE, L"cba")), diffs); } void diff_match_patch_test::testDiffCleanupSemanticLossless() { - // Slide diffs to match logical boundaries. - QList diffs = diffList(); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); - - diffs = diffList(Diff(EQUAL, "AAA\r\n\r\nBBB"), Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), Diff(EQUAL, "\r\nEEE")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Blank lines.", diffList(Diff(EQUAL, "AAA\r\n\r\n"), Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), Diff(EQUAL, "BBB\r\nEEE")), diffs); - - diffs = diffList(Diff(EQUAL, "AAA\r\nBBB"), Diff(INSERT, " DDD\r\nBBB"), Diff(EQUAL, " EEE")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemanticLossless: Line boundaries.", diffList(Diff(EQUAL, "AAA\r\n"), Diff(INSERT, "BBB DDD\r\n"), Diff(EQUAL, "BBB EEE")), diffs); - - diffs = diffList(Diff(EQUAL, "The c"), Diff(INSERT, "ow and the c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(INSERT, "cow and the "), Diff(EQUAL, "cat.")), diffs); - - diffs = diffList(Diff(EQUAL, "The-c"), Diff(INSERT, "ow-and-the-c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Alphanumeric boundaries.", diffList(Diff(EQUAL, "The-"), Diff(INSERT, "cow-and-the-"), Diff(EQUAL, "cat.")), diffs); - - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "a"), Diff(EQUAL, "ax")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the start.", diffList(Diff(DELETE, "a"), Diff(EQUAL, "aax")), diffs); - - diffs = diffList(Diff(EQUAL, "xa"), Diff(DELETE, "a"), Diff(EQUAL, "a")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Hitting the end.", diffList(Diff(EQUAL, "xaa"), Diff(DELETE, "a")), diffs); - - diffs = diffList(Diff(EQUAL, "The xxx. The "), Diff(INSERT, "zzz. The "), Diff(EQUAL, "yyy.")); - dmp.diff_cleanupSemanticLossless(diffs); - assertEquals("diff_cleanupSemantic: Sentence boundaries.", diffList(Diff(EQUAL, "The xxx."), Diff(INSERT, " The zzz."), Diff(EQUAL, " The yyy.")), diffs); + // Slide diffs to match logical boundaries. + std::vector diffs = diffList(); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemantic: Null case.", diffList(), diffs); + + diffs = diffList(Diff(EQUAL, L"AAA\r\n\r\nBBB"), Diff(INSERT, L"\r\nDDD\r\n\r\nBBB"), Diff(EQUAL, L"\r\nEEE")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemanticLossless: Blank lines.", diffList(Diff(EQUAL, L"AAA\r\n\r\n"), Diff(INSERT, L"BBB\r\nDDD\r\n\r\n"), Diff(EQUAL, L"BBB\r\nEEE")), diffs); + + diffs = diffList(Diff(EQUAL, L"AAA\r\nBBB"), Diff(INSERT, L" DDD\r\nBBB"), Diff(EQUAL, L" EEE")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemanticLossless: Line boundaries.", diffList(Diff(EQUAL, L"AAA\r\n"), Diff(INSERT, L"BBB DDD\r\n"), Diff(EQUAL, L"BBB EEE")), diffs); + + diffs = diffList(Diff(EQUAL, L"The c"), Diff(INSERT, L"ow and the c"), Diff(EQUAL, L"at.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, L"The "), Diff(INSERT, L"cow and the "), Diff(EQUAL, L"cat.")), diffs); + + diffs = diffList(Diff(EQUAL, L"The-c"), Diff(INSERT, L"ow-and-the-c"), Diff(EQUAL, L"at.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemantic: Alphanumeric boundaries.", diffList(Diff(EQUAL, L"The-"), Diff(INSERT, L"cow-and-the-"), Diff(EQUAL, L"cat.")), diffs); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"a"), Diff(EQUAL, L"ax")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemantic: Hitting the start.", diffList(Diff(DELETE, L"a"), Diff(EQUAL, L"aax")), diffs); + + diffs = diffList(Diff(EQUAL, L"xa"), Diff(DELETE, L"a"), Diff(EQUAL, L"a")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemantic: Hitting the end.", diffList(Diff(EQUAL, L"xaa"), Diff(DELETE, L"a")), diffs); + + diffs = diffList(Diff(EQUAL, L"The xxx. The "), Diff(INSERT, L"zzz. The "), Diff(EQUAL, L"yyy.")); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals(L"diff_cleanupSemantic: Sentence boundaries.", diffList(Diff(EQUAL, L"The xxx."), Diff(INSERT, L" The zzz."), Diff(EQUAL, L" The yyy.")), diffs); } void diff_match_patch_test::testDiffCleanupSemantic() { - // Cleanup semantically trivial equalities. - QList diffs = diffList(); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Null case.", diffList(), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #1.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), Diff(DELETE, "e")), diffs); - - diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No elimination #2.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")), diffs); - - diffs = diffList(Diff(DELETE, "a"), Diff(EQUAL, "b"), Diff(DELETE, "c")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Simple elimination.", diffList(Diff(DELETE, "abc"), Diff(INSERT, "b")), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(EQUAL, "cd"), Diff(DELETE, "e"), Diff(EQUAL, "f"), Diff(INSERT, "g")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Backpass elimination.", diffList(Diff(DELETE, "abcdef"), Diff(INSERT, "cdfg")), diffs); - - diffs = diffList(Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2"), Diff(EQUAL, "_"), Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Multiple elimination.", diffList(Diff(DELETE, "AB_AB"), Diff(INSERT, "1A2_1A2")), diffs); - - diffs = diffList(Diff(EQUAL, "The c"), Diff(DELETE, "ow and the c"), Diff(EQUAL, "at.")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, "The "), Diff(DELETE, "cow and the "), Diff(EQUAL, "cat.")), diffs); - - diffs = diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: No overlap elimination.", diffList(Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")), diffs); - - diffs = diffList(Diff(DELETE, "abcxxx"), Diff(INSERT, "xxxdef")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Overlap elimination.", diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xxx"), Diff(INSERT, "def")), diffs); - - diffs = diffList(Diff(DELETE, "xxxabc"), Diff(INSERT, "defxxx")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", diffList(Diff(INSERT, "def"), Diff(EQUAL, "xxx"), Diff(DELETE, "abc")), diffs); - - diffs = diffList(Diff(DELETE, "abcd1212"), Diff(INSERT, "1212efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A3"), Diff(INSERT, "3BC")); - dmp.diff_cleanupSemantic(diffs); - assertEquals("diff_cleanupSemantic: Two overlap eliminations.", diffList(Diff(DELETE, "abcd"), Diff(EQUAL, "1212"), Diff(INSERT, "efghi"), Diff(EQUAL, "----"), Diff(DELETE, "A"), Diff(EQUAL, "3"), Diff(INSERT, "BC")), diffs); + // Cleanup semantically trivial equalities. + std::vector diffs = diffList(); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Null case.", diffList(), diffs); + + diffs = diffList(Diff(DELETE, L"ab"), Diff(INSERT, L"cd"), Diff(EQUAL, L"12"), Diff(DELETE, L"e")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: No elimination #1.", diffList(Diff(DELETE, L"ab"), Diff(INSERT, L"cd"), Diff(EQUAL, L"12"), Diff(DELETE, L"e")), diffs); + + diffs = diffList(Diff(DELETE, L"abc"), Diff(INSERT, L"ABC"), Diff(EQUAL, L"1234"), Diff(DELETE, L"wxyz")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: No elimination #2.", diffList(Diff(DELETE, L"abc"), Diff(INSERT, L"ABC"), Diff(EQUAL, L"1234"), Diff(DELETE, L"wxyz")), diffs); + + diffs = diffList(Diff(DELETE, L"a"), Diff(EQUAL, L"b"), Diff(DELETE, L"c")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Simple elimination.", diffList(Diff(DELETE, L"abc"), Diff(INSERT, L"b")), diffs); + + diffs = diffList(Diff(DELETE, L"ab"), Diff(EQUAL, L"cd"), Diff(DELETE, L"e"), Diff(EQUAL, L"f"), Diff(INSERT, L"g")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Backpass elimination.", diffList(Diff(DELETE, L"abcdef"), Diff(INSERT, L"cdfg")), diffs); + + diffs = diffList(Diff(INSERT, L"1"), Diff(EQUAL, L"A"), Diff(DELETE, L"B"), Diff(INSERT, L"2"), Diff(EQUAL, L"_"), Diff(INSERT, L"1"), Diff(EQUAL, L"A"), Diff(DELETE, L"B"), Diff(INSERT, L"2")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Multiple elimination.", diffList(Diff(DELETE, L"AB_AB"), Diff(INSERT, L"1A2_1A2")), diffs); + + diffs = diffList(Diff(EQUAL, L"The c"), Diff(DELETE, L"ow and the c"), Diff(EQUAL, L"at.")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Word boundaries.", diffList(Diff(EQUAL, L"The "), Diff(DELETE, L"cow and the "), Diff(EQUAL, L"cat.")), diffs); + + diffs = diffList(Diff(DELETE, L"abcxx"), Diff(INSERT, L"xxdef")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: No overlap elimination.", diffList(Diff(DELETE, L"abcxx"), Diff(INSERT, L"xxdef")), diffs); + + diffs = diffList(Diff(DELETE, L"abcxxx"), Diff(INSERT, L"xxxdef")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Overlap elimination.", diffList(Diff(DELETE, L"abc"), Diff(EQUAL, L"xxx"), Diff(INSERT, L"def")), diffs); + + diffs = diffList(Diff(DELETE, L"xxxabc"), Diff(INSERT, L"defxxx")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Reverse overlap elimination.", diffList(Diff(INSERT, L"def"), Diff(EQUAL, L"xxx"), Diff(DELETE, L"abc")), diffs); + + diffs = diffList(Diff(DELETE, L"abcd1212"), Diff(INSERT, L"1212efghi"), Diff(EQUAL, L"----"), Diff(DELETE, L"A3"), Diff(INSERT, L"3BC")); + dmp.diff_cleanupSemantic(diffs); + assertEquals(L"diff_cleanupSemantic: Two overlap eliminations.", diffList(Diff(DELETE, L"abcd"), Diff(EQUAL, L"1212"), Diff(INSERT, L"efghi"), Diff(EQUAL, L"----"), Diff(DELETE, L"A"), Diff(EQUAL, L"3"), Diff(INSERT, L"BC")), diffs); } void diff_match_patch_test::testDiffCleanupEfficiency() { - // Cleanup operationally trivial equalities. - dmp.Diff_EditCost = 4; - QList diffs = diffList(); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Null case.", diffList(), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: No elimination.", diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Four-edit elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xyz34")), diffs); - - diffs = diffList(Diff(INSERT, "12"), Diff(EQUAL, "x"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Three-edit elimination.", diffList(Diff(DELETE, "xcd"), Diff(INSERT, "12x34")), diffs); - - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xy"), Diff(INSERT, "34"), Diff(EQUAL, "z"), Diff(DELETE, "cd"), Diff(INSERT, "56")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: Backpass elimination.", diffList(Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xy34z56")), diffs); - - dmp.Diff_EditCost = 5; - diffs = diffList(Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), Diff(DELETE, "cd"), Diff(INSERT, "34")); - dmp.diff_cleanupEfficiency(diffs); - assertEquals("diff_cleanupEfficiency: High cost elimination.", diffList(Diff(DELETE, "abwxyzcd"), Diff(INSERT, "12wxyz34")), diffs); - dmp.Diff_EditCost = 4; + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + std::vector diffs = diffList(); + dmp.diff_cleanupEfficiency(diffs); + assertEquals(L"diff_cleanupEfficiency: Null case.", diffList(), diffs); + + diffs = diffList(Diff(DELETE, L"ab"), Diff(INSERT, L"12"), Diff(EQUAL, L"wxyz"), Diff(DELETE, L"cd"), Diff(INSERT, L"34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals(L"diff_cleanupEfficiency: No elimination.", diffList(Diff(DELETE, L"ab"), Diff(INSERT, L"12"), Diff(EQUAL, L"wxyz"), Diff(DELETE, L"cd"), Diff(INSERT, L"34")), diffs); + + diffs = diffList(Diff(DELETE, L"ab"), Diff(INSERT, L"12"), Diff(EQUAL, L"xyz"), Diff(DELETE, L"cd"), Diff(INSERT, L"34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals(L"diff_cleanupEfficiency: Four-edit elimination.", diffList(Diff(DELETE, L"abxyzcd"), Diff(INSERT, L"12xyz34")), diffs); + + diffs = diffList(Diff(INSERT, L"12"), Diff(EQUAL, L"x"), Diff(DELETE, L"cd"), Diff(INSERT, L"34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals(L"diff_cleanupEfficiency: Three-edit elimination.", diffList(Diff(DELETE, L"xcd"), Diff(INSERT, L"12x34")), diffs); + + diffs = diffList(Diff(DELETE, L"ab"), Diff(INSERT, L"12"), Diff(EQUAL, L"xy"), Diff(INSERT, L"34"), Diff(EQUAL, L"z"), Diff(DELETE, L"cd"), Diff(INSERT, L"56")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals(L"diff_cleanupEfficiency: Backpass elimination.", diffList(Diff(DELETE, L"abxyzcd"), Diff(INSERT, L"12xy34z56")), diffs); + + dmp.Diff_EditCost = 5; + diffs = diffList(Diff(DELETE, L"ab"), Diff(INSERT, L"12"), Diff(EQUAL, L"wxyz"), Diff(DELETE, L"cd"), Diff(INSERT, L"34")); + dmp.diff_cleanupEfficiency(diffs); + assertEquals(L"diff_cleanupEfficiency: High cost elimination.", diffList(Diff(DELETE, L"abwxyzcd"), Diff(INSERT, L"12wxyz34")), diffs); + dmp.Diff_EditCost = 4; } void diff_match_patch_test::testDiffPrettyHtml() { - // Pretty print. - QList diffs = diffList(Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")); - assertEquals("diff_prettyHtml:", "
<B>b</B>c&d", dmp.diff_prettyHtml(diffs)); + // Pretty print. + std::vector diffs = diffList(Diff(EQUAL, L"a\n"), Diff(DELETE, L"b"), Diff(INSERT, L"c&d")); + assertEquals(L"diff_prettyHtml:", L"
<B>b</B>c&d", dmp.diff_prettyHtml(diffs)); } void diff_match_patch_test::testDiffText() { - // Compute the source and destination texts. - QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy")); - assertEquals("diff_text1:", "jumps over the lazy", dmp.diff_text1(diffs)); - assertEquals("diff_text2:", "jumped over a lazy", dmp.diff_text2(diffs)); + // Compute the source and destination texts. + std::vector diffs = diffList(Diff(EQUAL, L"jump"), Diff(DELETE, L"s"), Diff(INSERT, L"ed"), Diff(EQUAL, L" over "), Diff(DELETE, L"the"), Diff(INSERT, L"a"), Diff(EQUAL, L" lazy")); + assertEquals(L"diff_text1:", L"jumps over the lazy", dmp.diff_text1(diffs)); + assertEquals(L"diff_text2:", L"jumped over a lazy", dmp.diff_text2(diffs)); } void diff_match_patch_test::testDiffDelta() { - // Convert a diff into delta string. - QList diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, " lazy"), Diff(INSERT, "old dog")); - QString text1 = dmp.diff_text1(diffs); - assertEquals("diff_text1: Base text.", "jumps over the lazy", text1); - - QString delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta:", "=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta); - - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta(text1, delta)); - - // Generates error (19 < 20). - try { - dmp.diff_fromDelta(text1 + "x", delta); - assertFalse("diff_fromDelta: Too long.", true); - } catch (QString ex) { - // Exception expected. - } - - // Generates error (19 > 18). - try { - dmp.diff_fromDelta(text1.mid(1), delta); - assertFalse("diff_fromDelta: Too short.", true); - } catch (QString ex) { - // Exception expected. - } - - // Generates error (%c3%xy invalid Unicode). - /* This test does not work because QUrl::fromPercentEncoding("%xy") -> "?" - try { - dmp.diff_fromDelta("", "+%c3%xy"); - assertFalse("diff_fromDelta: Invalid character.", true); - } catch (QString ex) { - // Exception expected. - } - */ - - // Test deltas with special characters. - diffs = diffList(Diff(EQUAL, QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %", 7)), Diff(DELETE, QString::fromWCharArray((const wchar_t*) L"\u0681 \001 \n ^", 7)), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0682 \002 \\ |", 7))); - text1 = dmp.diff_text1(diffs); - assertEquals("diff_text1: Unicode text.", QString::fromWCharArray((const wchar_t*) L"\u0680 \000 \t %\u0681 \001 \n ^", 14), text1); - - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unicode.", "=7\t-7\t+%DA%82 %02 %5C %7C", delta); - - assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); - - // Verify pool of unchanged characters. - diffs = diffList(Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); - QString text2 = dmp.diff_text2(diffs); - assertEquals("diff_text2: Unchanged characters.", "A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); - - delta = dmp.diff_toDelta(diffs); - assertEquals("diff_toDelta: Unchanged characters.", "+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta); - - // Convert delta string into a diff. - assertEquals("diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta("", delta)); + // Convert a diff into delta string. + std::vector diffs = diffList(Diff(EQUAL, L"jump"), Diff(DELETE, L"s"), Diff(INSERT, L"ed"), Diff(EQUAL, L" over "), Diff(DELETE, L"the"), Diff(INSERT, L"a"), Diff(EQUAL, L" lazy"), Diff(INSERT, L"old dog")); + std::wstring text1 = dmp.diff_text1(diffs); + assertEquals(L"diff_text1: Base text.", L"jumps over the lazy", text1); + + std::wstring delta = dmp.diff_toDelta(diffs); + assertEquals(L"diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", delta); + + // Convert delta string into a diff. + assertEquals(L"diff_fromDelta: Normal.", diffs, dmp.diff_fromDelta(text1, delta)); + + // Generates error (19 < 20). + try { + dmp.diff_fromDelta(text1 + std::wstring(L"x"), delta); + assertFalse(L"diff_fromDelta: Too long.", true); + } catch (std::wstring ex) { + // Exception expected. + } + + // Generates error (19 > 18). + try { + dmp.diff_fromDelta(text1.substr(1), delta); + assertFalse(L"diff_fromDelta: Too short.", true); + } catch (std::wstring ex) { + // Exception expected. + } + + // Generates error (%c3%xy invalid Unicode). + /* This test does not work because QUrl::fromPercentEncoding(L"%xy") -> "?" + try { + dmp.diff_fromDelta(L"", L"+%c3%xy"); + assertFalse(L"diff_fromDelta: Invalid character.", true); + } catch (std::wstring ex) { + // Exception expected. + } + */ + + // Test deltas with special characters. + diffs = diffList(Diff(EQUAL, std::wstring(L"\u0680 \000 \t %", 7)), Diff(DELETE, std::wstring(L"\u0681 \001 \n ^", 7)), Diff(INSERT, std::wstring(L"\u0682 \002 \\ |", 7))); + text1 = dmp.diff_text1(diffs); + assertEquals(L"diff_text1: Unicode text.", std::wstring(L"\u0680 \000 \t %\u0681 \001 \n ^", 14), text1); + + delta = dmp.diff_toDelta(diffs); + assertEquals(L"diff_toDelta: Unicode.", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta); + + assertEquals(L"diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); + + // Verify pool of unchanged characters. + diffs = diffList(Diff(INSERT, L"A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); + std::wstring text2 = dmp.diff_text2(diffs); + assertEquals(L"diff_text2: Unchanged characters.", L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); + + delta = dmp.diff_toDelta(diffs); + assertEquals(L"diff_toDelta: Unchanged characters.", L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", delta); + + // Convert delta string into a diff. + assertEquals(L"diff_fromDelta: Unchanged characters.", diffs, dmp.diff_fromDelta(L"", delta)); } void diff_match_patch_test::testDiffXIndex() { - // Translate a location in text1 to text2. - QList diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex(diffs, 2)); + // Translate a location in text1 to text2. + std::vector diffs = diffList(Diff(DELETE, L"a"), Diff(INSERT, L"1234"), Diff(EQUAL, L"xyz")); + assertEquals(L"diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex(diffs, 2)); - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex(diffs, 3)); + diffs = diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"1234"), Diff(EQUAL, L"xyz")); + assertEquals(L"diff_xIndex: Translation on deletion.", 1, dmp.diff_xIndex(diffs, 3)); } void diff_match_patch_test::testDiffLevenshtein() { - QList diffs = diffList(Diff(DELETE, "abc"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")); - assertEquals("diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein(diffs)); + std::vector diffs = diffList(Diff(DELETE, L"abc"), Diff(INSERT, L"1234"), Diff(EQUAL, L"xyz")); + assertEquals(L"diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein(diffs)); - diffs = diffList(Diff(EQUAL, "xyz"), Diff(DELETE, "abc"), Diff(INSERT, "1234")); - assertEquals("diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein(diffs)); + diffs = diffList(Diff(EQUAL, L"xyz"), Diff(DELETE, L"abc"), Diff(INSERT, L"1234")); + assertEquals(L"diff_levenshtein: Leading equality.", 4, dmp.diff_levenshtein(diffs)); - diffs = diffList(Diff(DELETE, "abc"), Diff(EQUAL, "xyz"), Diff(INSERT, "1234")); - assertEquals("diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein(diffs)); + diffs = diffList(Diff(DELETE, L"abc"), Diff(EQUAL, L"xyz"), Diff(INSERT, L"1234")); + assertEquals(L"diff_levenshtein: Middle equality.", 7, dmp.diff_levenshtein(diffs)); } void diff_match_patch_test::testDiffBisect() { - // Normal. - QString a = "cat"; - QString b = "map"; - // Since the resulting diff hasn't been normalized, it would be ok if - // the insertion and deletion pairs are swapped. - // If the order changes, tweak this test as required. - QList diffs = diffList(Diff(DELETE, "c"), Diff(INSERT, "m"), Diff(EQUAL, "a"), Diff(DELETE, "t"), Diff(INSERT, "p")); - assertEquals("diff_bisect: Normal.", diffs, dmp.diff_bisect(a, b, std::numeric_limits::max())); - - // Timeout. - diffs = diffList(Diff(DELETE, "cat"), Diff(INSERT, "map")); - assertEquals("diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); + // Normal. + std::wstring a = L"cat"; + std::wstring b = L"map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + std::vector diffs = diffList(Diff(DELETE, L"c"), Diff(INSERT, L"m"), Diff(EQUAL, L"a"), Diff(DELETE, L"t"), Diff(INSERT, L"p")); + assertEquals(L"diff_bisect: Normal.", diffs, dmp.diff_bisect(a, b, std::numeric_limits::max())); + + // Timeout. + diffs = diffList(Diff(DELETE, L"cat"), Diff(INSERT, L"map")); + assertEquals(L"diff_bisect: Timeout.", diffs, dmp.diff_bisect(a, b, 0)); } void diff_match_patch_test::testDiffMain() { - // Perform a trivial diff. - QList diffs = diffList(); - assertEquals("diff_main: Null case.", diffs, dmp.diff_main("", "", false)); - - diffs = diffList(Diff(EQUAL, "abc")); - assertEquals("diff_main: Equality.", diffs, dmp.diff_main("abc", "abc", false)); - - diffs = diffList(Diff(EQUAL, "ab"), Diff(INSERT, "123"), Diff(EQUAL, "c")); - assertEquals("diff_main: Simple insertion.", diffs, dmp.diff_main("abc", "ab123c", false)); - - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "bc")); - assertEquals("diff_main: Simple deletion.", diffs, dmp.diff_main("a123bc", "abc", false)); - - diffs = diffList(Diff(EQUAL, "a"), Diff(INSERT, "123"), Diff(EQUAL, "b"), Diff(INSERT, "456"), Diff(EQUAL, "c")); - assertEquals("diff_main: Two insertions.", diffs, dmp.diff_main("abc", "a123b456c", false)); - - diffs = diffList(Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "b"), Diff(DELETE, "456"), Diff(EQUAL, "c")); - assertEquals("diff_main: Two deletions.", diffs, dmp.diff_main("a123b456c", "abc", false)); - - // Perform a real diff. - // Switch off the timeout. - dmp.Diff_Timeout = 0; - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, "b")); - assertEquals("diff_main: Simple case #1.", diffs, dmp.diff_main("a", "b", false)); - - diffs = diffList(Diff(DELETE, "Apple"), Diff(INSERT, "Banana"), Diff(EQUAL, "s are a"), Diff(INSERT, "lso"), Diff(EQUAL, " fruit.")); - assertEquals("diff_main: Simple case #2.", diffs, dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); - - diffs = diffList(Diff(DELETE, "a"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\u0680", 1)), Diff(EQUAL, "x"), Diff(DELETE, "\t"), Diff(INSERT, QString::fromWCharArray((const wchar_t*) L"\000", 1))); - assertEquals("diff_main: Simple case #3.", diffs, dmp.diff_main("ax\t", QString::fromWCharArray((const wchar_t*) L"\u0680x\000", 3), false)); - - diffs = diffList(Diff(DELETE, "1"), Diff(EQUAL, "a"), Diff(DELETE, "y"), Diff(EQUAL, "b"), Diff(DELETE, "2"), Diff(INSERT, "xab")); - assertEquals("diff_main: Overlap #1.", diffs, dmp.diff_main("1ayb2", "abxab", false)); - - diffs = diffList(Diff(INSERT, "xaxcx"), Diff(EQUAL, "abc"), Diff(DELETE, "y")); - assertEquals("diff_main: Overlap #2.", diffs, dmp.diff_main("abcy", "xaxcxabc", false)); - - diffs = diffList(Diff(DELETE, "ABCD"), Diff(EQUAL, "a"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "bcd"), Diff(DELETE, "="), Diff(INSERT, "-"), Diff(EQUAL, "efghijklmnopqrs"), Diff(DELETE, "EFGHIJKLMNOefg")); - assertEquals("diff_main: Overlap #3.", diffs, dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", "a-bcd-efghijklmnopqrs", false)); - - diffs = diffList(Diff(INSERT, " "), Diff(EQUAL, "a"), Diff(INSERT, "nd"), Diff(EQUAL, " [[Pennsylvania]]"), Diff(DELETE, " and [[New")); - assertEquals("diff_main: Large equality.", diffs, dmp.diff_main("a [[Pennsylvania]] and [[New", " and [[Pennsylvania]]", false)); - - dmp.Diff_Timeout = 0.1f; // 100ms - // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. - QString a = "`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; - QString b = "I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; - // Increase the text lengths by 1024 times to ensure a timeout. - for (int x = 0; x < 10; x++) { - a = a + a; - b = b + b; - } - clock_t startTime = clock(); - dmp.diff_main(a, b); - clock_t endTime = clock(); - // Test that we took at least the timeout period. - assertTrue("diff_main: Timeout min.", dmp.Diff_Timeout * CLOCKS_PER_SEC <= endTime - startTime); - // Test that we didn't take forever (be forgiving). - // Theoretically this test could fail very occasionally if the - // OS task swaps or locks up for a second at the wrong moment. - // Java seems to overrun by ~80% (compared with 10% for other languages). - // Therefore use an upper limit of 0.5s instead of 0.2s. - assertTrue("diff_main: Timeout max.", dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 > endTime - startTime); - dmp.Diff_Timeout = 0; - - // Test the linemode speedup. - // Must be long to pass the 100 char cutoff. - a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = "abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; - assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); - - a = "1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; - b = "abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; - assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); - - a = "1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; - b = "abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; - QStringList texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); - QStringList texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); - assertEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); - - // Test null inputs. - try { - dmp.diff_main(NULL, NULL); - assertFalse("diff_main: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + // Perform a trivial diff. + std::vector diffs = diffList(); + assertEquals(L"diff_main: Null case.", diffs, dmp.diff_main(L"", L"", false)); + + diffs = diffList(Diff(EQUAL, L"abc")); + assertEquals(L"diff_main: Equality.", diffs, dmp.diff_main(L"abc", L"abc", false)); + + diffs = diffList(Diff(EQUAL, L"ab"), Diff(INSERT, L"123"), Diff(EQUAL, L"c")); + assertEquals(L"diff_main: Simple insertion.", diffs, dmp.diff_main(L"abc", L"ab123c", false)); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"123"), Diff(EQUAL, L"bc")); + assertEquals(L"diff_main: Simple deletion.", diffs, dmp.diff_main(L"a123bc", L"abc", false)); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(INSERT, L"123"), Diff(EQUAL, L"b"), Diff(INSERT, L"456"), Diff(EQUAL, L"c")); + assertEquals(L"diff_main: Two insertions.", diffs, dmp.diff_main(L"abc", L"a123b456c", false)); + + diffs = diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"123"), Diff(EQUAL, L"b"), Diff(DELETE, L"456"), Diff(EQUAL, L"c")); + assertEquals(L"diff_main: Two deletions.", diffs, dmp.diff_main(L"a123b456c", L"abc", false)); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = diffList(Diff(DELETE, L"a"), Diff(INSERT, L"b")); + assertEquals(L"diff_main: Simple case #1.", diffs, dmp.diff_main(L"a", L"b", false)); + + diffs = diffList(Diff(DELETE, L"Apple"), Diff(INSERT, L"Banana"), Diff(EQUAL, L"s are a"), Diff(INSERT, L"lso"), Diff(EQUAL, L" fruit.")); + assertEquals(L"diff_main: Simple case #2.", diffs, dmp.diff_main(L"Apples are a fruit.", L"Bananas are also fruit.", false)); + + diffs = diffList(Diff(DELETE, L"a"), Diff(INSERT, std::wstring(L"\u0680", 1)), Diff(EQUAL, L"x"), Diff(DELETE, L"\t"), Diff(INSERT, std::wstring(L"\000", 1))); + assertEquals(L"diff_main: Simple case #3.", diffs, dmp.diff_main(L"ax\t", std::wstring(L"\u0680x\000", 3), false)); + + diffs = diffList(Diff(DELETE, L"1"), Diff(EQUAL, L"a"), Diff(DELETE, L"y"), Diff(EQUAL, L"b"), Diff(DELETE, L"2"), Diff(INSERT, L"xab")); + assertEquals(L"diff_main: Overlap #1.", diffs, dmp.diff_main(L"1ayb2", L"abxab", false)); + + diffs = diffList(Diff(INSERT, L"xaxcx"), Diff(EQUAL, L"abc"), Diff(DELETE, L"y")); + assertEquals(L"diff_main: Overlap #2.", diffs, dmp.diff_main(L"abcy", L"xaxcxabc", false)); + + diffs = diffList(Diff(DELETE, L"ABCD"), Diff(EQUAL, L"a"), Diff(DELETE, L"="), Diff(INSERT, L"-"), Diff(EQUAL, L"bcd"), Diff(DELETE, L"="), Diff(INSERT, L"-"), Diff(EQUAL, L"efghijklmnopqrs"), Diff(DELETE, L"EFGHIJKLMNOefg")); + assertEquals(L"diff_main: Overlap #3.", diffs, dmp.diff_main(L"ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", L"a-bcd-efghijklmnopqrs", false)); + + diffs = diffList(Diff(INSERT, L" "), Diff(EQUAL, L"a"), Diff(INSERT, L"nd"), Diff(EQUAL, L" [[Pennsylvania]]"), Diff(DELETE, L" and [[New")); + assertEquals(L"diff_main: Large equality.", diffs, dmp.diff_main(L"a [[Pennsylvania]] and [[New", L" and [[Pennsylvania]]", false)); + + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the text lengths. + std::wstring a = L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + std::wstring b = L"I am the very model of a modern major general,\nI've information vegetable, animal, and mineral,\nI know the kings of England, and I quote the fights historical,\nFrom Marathon to Waterloo, in order categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main(a, b); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue(L"diff_main: Timeout min.", dmp.Diff_Timeout * CLOCKS_PER_SEC <= endTime - startTime); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue(L"diff_main: Timeout max.", dmp.Diff_Timeout * CLOCKS_PER_SEC * 2 > endTime - startTime); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\n"; + assertEquals(L"diff_main: Simple line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); + + a = L"1234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"; + b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals(L"diff_main: Single line-mode.", dmp.diff_main(a, b, true), dmp.diff_main(a, b, false)); + + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n"; + b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n"; + std::wstring_list texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); + std::wstring_list texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); + assertEquals(L"diff_main: Overlap line-mode.", texts_textmode, texts_linemode); + + // Test null inputs. + try { + dmp.diff_main(NULL, NULL); + assertFalse(L"diff_main: Null inputs.", true); + } catch (const char* ex) { + // Exception expected. + } } @@ -595,86 +596,86 @@ void diff_match_patch_test::testDiffMain() { void diff_match_patch_test::testMatchAlphabet() { - // Initialise the bitmasks for Bitap. - QMap bitmask; - bitmask.insert('a', 4); - bitmask.insert('b', 2); - bitmask.insert('c', 1); - assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); - - bitmask = QMap(); - bitmask.insert('a', 37); - bitmask.insert('b', 18); - bitmask.insert('c', 8); - assertEquals("match_alphabet: Duplicates.", bitmask, dmp.match_alphabet("abcaba")); + // Initialise the bitmasks for Bitap. + std::unordered_map bitmask; + bitmask.emplace('a', 4); + bitmask.emplace('b', 2); + bitmask.emplace('c', 1); + assertEquals(L"match_alphabet: Unique.", bitmask, dmp.match_alphabet(L"abc")); + + bitmask.clear(); + bitmask.emplace('a', 37); + bitmask.emplace('b', 18); + bitmask.emplace('c', 8); + assertEquals(L"match_alphabet: Duplicates.", bitmask, dmp.match_alphabet(L"abcaba")); } void diff_match_patch_test::testMatchBitap() { - // Bitap algorithm. - dmp.Match_Distance = 100; - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Exact match #1.", 5, dmp.match_bitap("abcdefghijk", "fgh", 5)); + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals(L"match_bitap: Exact match #1.", 5, dmp.match_bitap(L"abcdefghijk", L"fgh", 5)); - assertEquals("match_bitap: Exact match #2.", 5, dmp.match_bitap("abcdefghijk", "fgh", 0)); + assertEquals(L"match_bitap: Exact match #2.", 5, dmp.match_bitap(L"abcdefghijk", L"fgh", 0)); - assertEquals("match_bitap: Fuzzy match #1.", 4, dmp.match_bitap("abcdefghijk", "efxhi", 0)); + assertEquals(L"match_bitap: Fuzzy match #1.", 4, dmp.match_bitap(L"abcdefghijk", L"efxhi", 0)); - assertEquals("match_bitap: Fuzzy match #2.", 2, dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); + assertEquals(L"match_bitap: Fuzzy match #2.", 2, dmp.match_bitap(L"abcdefghijk", L"cdefxyhijk", 5)); - assertEquals("match_bitap: Fuzzy match #3.", -1, dmp.match_bitap("abcdefghijk", "bxy", 1)); + assertEquals(L"match_bitap: Fuzzy match #3.", -1, dmp.match_bitap(L"abcdefghijk", L"bxy", 1)); - assertEquals("match_bitap: Overflow.", 2, dmp.match_bitap("123456789xx0", "3456789x0", 2)); + assertEquals(L"match_bitap: Overflow.", 2, dmp.match_bitap(L"123456789xx0", L"3456789x0", 2)); - assertEquals("match_bitap: Before start match.", 0, dmp.match_bitap("abcdef", "xxabc", 4)); + assertEquals(L"match_bitap: Before start match.", 0, dmp.match_bitap(L"abcdef", L"xxabc", 4)); - assertEquals("match_bitap: Beyond end match.", 3, dmp.match_bitap("abcdef", "defyy", 4)); + assertEquals(L"match_bitap: Beyond end match.", 3, dmp.match_bitap(L"abcdef", L"defyy", 4)); - assertEquals("match_bitap: Oversized pattern.", 0, dmp.match_bitap("abcdef", "xabcdefy", 0)); + assertEquals(L"match_bitap: Oversized pattern.", 0, dmp.match_bitap(L"abcdef", L"xabcdefy", 0)); - dmp.Match_Threshold = 0.4f; - assertEquals("match_bitap: Threshold #1.", 4, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + dmp.Match_Threshold = 0.4f; + assertEquals(L"match_bitap: Threshold #1.", 4, dmp.match_bitap(L"abcdefghijk", L"efxyhi", 1)); - dmp.Match_Threshold = 0.3f; - assertEquals("match_bitap: Threshold #2.", -1, dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + dmp.Match_Threshold = 0.3f; + assertEquals(L"match_bitap: Threshold #2.", -1, dmp.match_bitap(L"abcdefghijk", L"efxyhi", 1)); - dmp.Match_Threshold = 0.0f; - assertEquals("match_bitap: Threshold #3.", 1, dmp.match_bitap("abcdefghijk", "bcdef", 1)); + dmp.Match_Threshold = 0.0f; + assertEquals(L"match_bitap: Threshold #3.", 1, dmp.match_bitap(L"abcdefghijk", L"bcdef", 1)); - dmp.Match_Threshold = 0.5f; - assertEquals("match_bitap: Multiple select #1.", 0, dmp.match_bitap("abcdexyzabcde", "abccde", 3)); + dmp.Match_Threshold = 0.5f; + assertEquals(L"match_bitap: Multiple select #1.", 0, dmp.match_bitap(L"abcdexyzabcde", L"abccde", 3)); - assertEquals("match_bitap: Multiple select #2.", 8, dmp.match_bitap("abcdexyzabcde", "abccde", 5)); + assertEquals(L"match_bitap: Multiple select #2.", 8, dmp.match_bitap(L"abcdexyzabcde", L"abccde", 5)); - dmp.Match_Distance = 10; // Strict location. - assertEquals("match_bitap: Distance test #1.", -1, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + dmp.Match_Distance = 10; // Strict location. + assertEquals(L"match_bitap: Distance test #1.", -1, dmp.match_bitap(L"abcdefghijklmnopqrstuvwxyz", L"abcdefg", 24)); - assertEquals("match_bitap: Distance test #2.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); + assertEquals(L"match_bitap: Distance test #2.", 0, dmp.match_bitap(L"abcdefghijklmnopqrstuvwxyz", L"abcdxxefg", 1)); - dmp.Match_Distance = 1000; // Loose location. - assertEquals("match_bitap: Distance test #3.", 0, dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + dmp.Match_Distance = 1000; // Loose location. + assertEquals(L"match_bitap: Distance test #3.", 0, dmp.match_bitap(L"abcdefghijklmnopqrstuvwxyz", L"abcdefg", 24)); } void diff_match_patch_test::testMatchMain() { - // Full match. - assertEquals("match_main: Equality.", 0, dmp.match_main("abcdef", "abcdef", 1000)); + // Full match. + assertEquals(L"match_main: Equality.", 0, dmp.match_main(L"abcdef", L"abcdef", 1000)); - assertEquals("match_main: Null text.", -1, dmp.match_main("", "abcdef", 1)); + assertEquals(L"match_main: Null text.", -1, dmp.match_main(L"", L"abcdef", 1)); - assertEquals("match_main: Null pattern.", 3, dmp.match_main("abcdef", "", 3)); + assertEquals(L"match_main: Null pattern.", 3, dmp.match_main(L"abcdef", L"", 3)); - assertEquals("match_main: Exact match.", 3, dmp.match_main("abcdef", "de", 3)); + assertEquals(L"match_main: Exact match.", 3, dmp.match_main(L"abcdef", L"de", 3)); - dmp.Match_Threshold = 0.7f; - assertEquals("match_main: Complex match.", 4, dmp.match_main("I am the very model of a modern major general.", " that berry ", 5)); - dmp.Match_Threshold = 0.5f; + dmp.Match_Threshold = 0.7f; + assertEquals(L"match_main: Complex match.", 4, dmp.match_main(L"I am the very model of a modern major general.", L" that berry ", 5)); + dmp.Match_Threshold = 0.5f; - // Test null inputs. - try { - dmp.match_main(NULL, NULL, 0); - assertFalse("match_main: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + // Test null inputs. + try { + dmp.match_main(NULL, NULL, 0); + assertFalse(L"match_main: Null inputs.", true); + } catch (const char* ex) { + // Exception expected. + } } @@ -682,516 +683,516 @@ void diff_match_patch_test::testMatchMain() { void diff_match_patch_test::testPatchObj() { - // Patch Object. - Patch p; - p.start1 = 20; - p.start2 = 21; - p.length1 = 18; - p.length2 = 17; - p.diffs = diffList(Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), Diff(EQUAL, "\nlaz")); - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("Patch: toString.", strp, p.toString()); + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = diffList(Diff(EQUAL, L"jump"), Diff(DELETE, L"s"), Diff(INSERT, L"ed"), Diff(EQUAL, L" over "), Diff(DELETE, L"the"), Diff(INSERT, L"a"), Diff(EQUAL, L"\nlaz")); + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals(L"Patch: toString.", strp, p.toString()); } void diff_match_patch_test::testPatchFromText() { - assertTrue("patch_fromText: #0.", dmp.patch_fromText("").isEmpty()); + assertTrue(L"patch_fromText: #0.", dmp.patch_fromText(L"").size() == 0); - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals("patch_fromText: #1.", strp, dmp.patch_fromText(strp).value(0).toString()); + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals(L"patch_fromText: #1.", strp, dmp.patch_fromText(strp)[0].toString()); - assertEquals("patch_fromText: #2.", "@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n").value(0).toString()); + assertEquals(L"patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText(L"@@ -1 +1 @@\n-a\n+b\n")[0].toString()); - assertEquals("patch_fromText: #3.", "@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n").value(0).toString()); + assertEquals(L"patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText(L"@@ -1,3 +0,0 @@\n-abc\n")[0].toString()); - assertEquals("patch_fromText: #4.", "@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n").value(0).toString()); + assertEquals(L"patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText(L"@@ -0,0 +1,3 @@\n+abc\n")[0].toString()); - // Generates error. - try { - dmp.patch_fromText("Bad\nPatch\n"); - assertFalse("patch_fromText: #5.", true); - } catch (QString ex) { - // Exception expected. - } + // Generates error. + try { + dmp.patch_fromText(L"Bad\nPatch\n"); + assertFalse(L"patch_fromText: #5.", true); + } catch (std::wstring ex) { + // Exception expected. + } } void diff_match_patch_test::testPatchToText() { - QString strp = "@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - QList patches; - patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Single", strp, dmp.patch_toText(patches)); - - strp = "@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; - patches = dmp.patch_fromText(strp); - assertEquals("patch_toText: Dual", strp, dmp.patch_toText(patches)); + std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + std::vector patches; + patches = dmp.patch_fromText(strp); + assertEquals(L"patch_toText: Single", strp, dmp.patch_toText(patches)); + + strp = L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n tes\n"; + patches = dmp.patch_fromText(strp); + assertEquals(L"patch_toText: Dual", strp, dmp.patch_toText(patches)); } void diff_match_patch_test::testPatchAddContext() { - dmp.Patch_Margin = 4; - Patch p; - p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_addContext: Simple case.", "@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); - - p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough trailing context.", "@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps."); - assertEquals("patch_addContext: Not enough leading context.", "@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); - - p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n").value(0); - dmp.patch_addContext(p, "The quick brown fox jumps. The quick brown fox crashes."); - assertEquals("patch_addContext: Ambiguity.", "@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString()); + dmp.Patch_Margin = 4; + Patch p; + p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + dmp.patch_addContext(p, L"The quick brown fox jumps over the lazy dog."); + assertEquals(L"patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); + + p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + dmp.patch_addContext(p, L"The quick brown fox jumps."); + assertEquals(L"patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString()); + + p = dmp.patch_fromText(L"@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext(p, L"The quick brown fox jumps."); + assertEquals(L"patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); + + p = dmp.patch_fromText(L"@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext(p, L"The quick brown fox jumps. The quick brown fox crashes."); + assertEquals(L"patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString()); } void diff_match_patch_test::testPatchMake() { - QList patches; - patches = dmp.patch_make("", ""); - assertEquals("patch_make: Null case", "", dmp.patch_toText(patches)); - - QString text1 = "The quick brown fox jumps over the lazy dog."; - QString text2 = "That quick brown fox jumped over a lazy dog."; - QString expectedPatch = "@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; - // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. - patches = dmp.patch_make(text2, text1); - assertEquals("patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText(patches)); - - expectedPatch = "@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText(patches)); - - QList diffs = dmp.diff_main(text1, text2, false); - patches = dmp.patch_make(diffs); - assertEquals("patch_make: Diff input", expectedPatch, dmp.patch_toText(patches)); - - patches = dmp.patch_make(text1, diffs); - assertEquals("patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText(patches)); - - patches = dmp.patch_make(text1, text2, diffs); - assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText(patches)); - - patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); - assertEquals("patch_toText: Character encoding.", "@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText(patches)); - - diffs = diffList(Diff(DELETE, "`1234567890-=[]\\;',./"), Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")); - assertEquals("patch_fromText: Character decoding.", diffs, dmp.patch_fromText("@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n").value(0).diffs); - - text1 = ""; - for (int x = 0; x < 100; x++) { - text1 += "abcdef"; - } - text2 = text1 + "123"; - expectedPatch = "@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; - patches = dmp.patch_make(text1, text2); - assertEquals("patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText(patches)); - - // Test null inputs. - try { - dmp.patch_make(NULL, NULL); - assertFalse("patch_make: Null inputs.", true); - } catch (const char* ex) { - // Exception expected. - } + std::vector patches; + patches = dmp.patch_make(L"", L""); + assertEquals(L"patch_make: Null case", L"", dmp.patch_toText(patches)); + + std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; + std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; + std::wstring expectedPatch = L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to rolling context. + patches = dmp.patch_make(text2, text1); + assertEquals(L"patch_make: Text2+Text1 inputs", expectedPatch, dmp.patch_toText(patches)); + + expectedPatch = L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make(text1, text2); + assertEquals(L"patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText(patches)); + + std::vector diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + assertEquals(L"patch_make: Diff input", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, diffs); + assertEquals(L"patch_make: Text1+Diff inputs", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, text2, diffs); + assertEquals(L"patch_make: Text1+Text2+Diff inputs (deprecated)", expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"`1234567890-=[]\\;',./", L"~!@#$%^&*()_+{}|:\"<>?"); + assertEquals(L"patch_toText: Character encoding.", L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText(patches)); + + diffs = diffList(Diff(DELETE, L"`1234567890-=[]\\;',./"), Diff(INSERT, L"~!@#$%^&*()_+{}|:\"<>?")); + assertEquals(L"patch_fromText: Character decoding.", diffs, dmp.patch_fromText(L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0].diffs); + + text1 = L""; + for (int x = 0; x < 100; x++) { + text1 += L"abcdef"; + } + text2 = text1 + L"123"; + expectedPatch = L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make(text1, text2); + assertEquals(L"patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText(patches)); + + // Test null inputs. + try { + dmp.patch_make(NULL, NULL); + assertFalse(L"patch_make: Null inputs.", true); + } catch (const char* ex) { + // Exception expected. + } } void diff_match_patch_test::testPatchSplitMax() { - // Assumes that Match_MaxBits is 32. - QList patches; - patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz01234567890", "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #1.", "@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", "abcdefuvwxyz"); - QString oldToText = dmp.patch_toText(patches); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); - - patches = dmp.patch_make("1234567890123456789012345678901234567890123456789012345678901234567890", "abc"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #3.", "@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"); - dmp.patch_splitMax(patches); - assertEquals("patch_splitMax: #4.", "@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText(patches)); + // Assumes that Match_MaxBits is 32. + std::vector patches; + patches = dmp.patch_make(L"abcdefghijklmnopqrstuvwxyz01234567890", L"XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); + dmp.patch_splitMax(patches); + assertEquals(L"patch_splitMax: #1.", L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"abcdef1234567890123456789012345678901234567890123456789012345678901234567890uvwxyz", L"abcdefuvwxyz"); + std::wstring oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + assertEquals(L"patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"1234567890123456789012345678901234567890123456789012345678901234567890", L"abc"); + dmp.patch_splitMax(patches); + assertEquals(L"patch_splitMax: #3.", L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ -29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ -57,14 +1,3 @@\n-78901234567890\n+abc\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1", L"abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : 0 , t : 1"); + dmp.patch_splitMax(patches); + assertEquals(L"patch_splitMax: #4.", L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ -29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", dmp.patch_toText(patches)); } void diff_match_patch_test::testPatchAddPadding() { - QList patches; - patches = dmp.patch_make("", "test"); - assertEquals("patch_addPadding: Both edges full.", "@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges full.", "@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("XY", "XtestY"); - assertEquals("patch_addPadding: Both edges partial.", "@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges partial.", "@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText(patches)); - - patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); - assertEquals("patch_addPadding: Both edges none.", "@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); - dmp.patch_addPadding(patches); - assertEquals("patch_addPadding: Both edges none.", "@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); + std::vector patches; + patches = dmp.patch_make(L"", L"test"); + assertEquals(L"patch_addPadding: Both edges full.", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals(L"patch_addPadding: Both edges full.", L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"XY", L"XtestY"); + assertEquals(L"patch_addPadding: Both edges partial.", L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals(L"patch_addPadding: Both edges partial.", L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"XXXXYYYY", L"XXXXtestYYYY"); + assertEquals(L"patch_addPadding: Both edges none.", L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals(L"patch_addPadding: Both edges none.", L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", dmp.patch_toText(patches)); } void diff_match_patch_test::testPatchApply() { - dmp.Match_Distance = 1000; - dmp.Match_Threshold = 0.5f; - dmp.Patch_DeleteThreshold = 0.5f; - QList patches; - patches = dmp.patch_make("", ""); - QPair > results = dmp.patch_apply(patches, "Hello world."); - QVector boolArray = results.second; - - QString resultStr = QString("%1\t%2").arg(results.first).arg(boolArray.count()); - assertEquals("patch_apply: Null case.", "Hello world.\t0", resultStr); - - patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "That quick brown fox jumped over a lazy dog."); - results = dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Exact match.", "That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr); - - results = dmp.patch_apply(patches, "The quick red rabbit jumps over the tired tiger."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Partial match.", "That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr); - - results = dmp.patch_apply(patches, "I am the very model of a modern major general."); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Failed match.", "I am the very model of a modern major general.\tfalse\tfalse", resultStr); - - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, small change.", "xabcy\ttrue\ttrue", resultStr); - - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, large change 1.", "xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr); - - dmp.Patch_DeleteThreshold = 0.6f; - patches = dmp.patch_make("x1234567890123456789012345678901234567890123456789012345678901234567890y", "xabcy"); - results = dmp.patch_apply(patches, "x12345678901234567890---------------++++++++++---------------12345678901234567890y"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Big delete, large change 2.", "xabcy\ttrue\ttrue", resultStr); - dmp.Patch_DeleteThreshold = 0.5f; - - dmp.Match_Threshold = 0.0f; - dmp.Match_Distance = 0; - patches = dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"); - results = dmp.patch_apply(patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false") + "\t" + (boolArray[1] ? "true" : "false"); - assertEquals("patch_apply: Compensate for failed patch.", "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr); - dmp.Match_Threshold = 0.5f; - dmp.Match_Distance = 1000; - - patches = dmp.patch_make("", "test"); - QString patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, ""); - assertEquals("patch_apply: No side effects.", patchStr, dmp.patch_toText(patches)); - - patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); - patchStr = dmp.patch_toText(patches); - dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); - assertEquals("patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText(patches)); - - patches = dmp.patch_make("", "test"); - results = dmp.patch_apply(patches, ""); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Edge exact match.", "test\ttrue", resultStr); - - patches = dmp.patch_make("XY", "XtestY"); - results = dmp.patch_apply(patches, "XY"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Near edge exact match.", "XtestY\ttrue", resultStr); - - patches = dmp.patch_make("y", "y123"); - results = dmp.patch_apply(patches, "x"); - boolArray = results.second; - resultStr = results.first + "\t" + (boolArray[0] ? "true" : "false"); - assertEquals("patch_apply: Edge partial match.", "x123\ttrue", resultStr); + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + std::vector patches; + patches = dmp.patch_make(L"", L""); + std::pair > results = dmp.patch_apply(patches, L"Hello world."); + std::vector boolArray = results.second; + + std::wstring resultStr = std::format(L"%ls\t%d", std::get<0>(results).c_str(), boolArray.size()); + assertEquals(L"patch_apply: Null case.", L"Hello world.\t0", resultStr); + + patches = dmp.patch_make(L"The quick brown fox jumps over the lazy dog.", L"That quick brown fox jumped over a lazy dog."); + results = dmp.patch_apply(patches, L"The quick brown fox jumps over the lazy dog."); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false") + L"\t" + (boolArray[1] ? L"true" : L"false"); + assertEquals(L"patch_apply: Exact match.", L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", resultStr); + + results = dmp.patch_apply(patches, L"The quick red rabbit jumps over the tired tiger."); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false") + L"\t" + (boolArray[1] ? L"true" : L"false"); + assertEquals(L"patch_apply: Partial match.", L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", resultStr); + + results = dmp.patch_apply(patches, L"I am the very model of a modern major general."); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false") + L"\t" + (boolArray[1] ? L"true" : L"false"); + assertEquals(L"patch_apply: Failed match.", L"I am the very model of a modern major general.\tfalse\tfalse", resultStr); + + patches = dmp.patch_make(L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy"); + results = dmp.patch_apply(patches, L"x123456789012345678901234567890-----++++++++++-----123456789012345678901234567890y"); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false") + L"\t" + (boolArray[1] ? L"true" : L"false"); + assertEquals(L"patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", resultStr); + + patches = dmp.patch_make(L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy"); + results = dmp.patch_apply(patches, L"x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false") + L"\t" + (boolArray[1] ? L"true" : L"false"); + assertEquals(L"patch_apply: Big delete, large change 1.", L"xabc12345678901234567890---------------++++++++++---------------12345678901234567890y\tfalse\ttrue", resultStr); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make(L"x1234567890123456789012345678901234567890123456789012345678901234567890y", L"xabcy"); + results = dmp.patch_apply(patches, L"x12345678901234567890---------------++++++++++---------------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false") + L"\t" + (boolArray[1] ? L"true" : L"false"); + assertEquals(L"patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", resultStr); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = dmp.patch_make(L"abcdefghijklmnopqrstuvwxyz--------------------1234567890", L"abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------1234567YYYYYYYYYY890"); + results = dmp.patch_apply(patches, L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false") + L"\t" + (boolArray[1] ? L"true" : L"false"); + assertEquals(L"patch_apply: Compensate for failed patch.", L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567YYYYYYYYYY890\tfalse\ttrue", resultStr); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make(L"", L"test"); + std::wstring patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, L""); + assertEquals(L"patch_apply: No side effects.", patchStr, dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"The quick brown fox jumps over the lazy dog.", L"Woof"); + patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, L"The quick brown fox jumps over the lazy dog."); + assertEquals(L"patch_apply: No side effects with major delete.", patchStr, dmp.patch_toText(patches)); + + patches = dmp.patch_make(L"", L"test"); + results = dmp.patch_apply(patches, L""); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false"); + assertEquals(L"patch_apply: Edge exact match.", L"test\ttrue", resultStr); + + patches = dmp.patch_make(L"XY", L"XtestY"); + results = dmp.patch_apply(patches, L"XY"); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false"); + assertEquals(L"patch_apply: Near edge exact match.", L"XtestY\ttrue", resultStr); + + patches = dmp.patch_make(L"y", L"y123"); + results = dmp.patch_apply(patches, L"x"); + boolArray = results.second; + resultStr = results.first + L"\t" + (boolArray[0] ? L"true" : L"false"); + assertEquals(L"patch_apply: Edge partial match.", L"x123\ttrue", resultStr); } -void diff_match_patch_test::assertEquals(const QString &strCase, int n1, int n2) { - if (n1 != n2) { - qDebug("%s FAIL\nExpected: %d\nActual: %d", qPrintable(strCase), n1, n2); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals(const std::wstring &strCase, int n1, int n2) { + if (n1 != n2) { + std::debug_print(L"%ls FAIL\nExpected: %d\nActual: %d", qPrintable(strCase), n1, n2); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QString &s1, const QString &s2) { - if (s1 != s2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(s1), qPrintable(s2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::wstring &s1, const std::wstring &s2) { + if (s1 != s2) { + std::debug_print(L"%ls FAIL\nExpected: %ls\nActual: %ls", + qPrintable(strCase), qPrintable(s1), qPrintable(s2)); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const QString &strCase, const Diff &d1, const Diff &d2) { - if (d1 != d2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(d1.toString()), qPrintable(d2.toString())); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const Diff &d1, const Diff &d2) { + if (d1 != d2) { + std::debug_print(L"%ls FAIL\nExpected: %ls\nActual: %ls", qPrintable(strCase), + qPrintable(d1.toString()), qPrintable(d2.toString())); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { - bool fail = false; - if (list1.count() == list2.count()) { - int i = 0; - foreach(Diff d1, list1) { - Diff d2 = list2.value(i); - if (d1 != d2) { +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2) { + bool fail = false; + if (list1.size() == list2.size()) { + int i = 0; + for(Diff d1: list1) { + Diff d2 = list2[i]; + if (d1 != d2) { + fail = true; + break; + } + i++; + } + } else { fail = true; - break; - } - i++; - } - } else { - fail = true; - } - - if (fail) { - // Build human readable description of both lists. - QString listString1 = "("; - bool first = true; - foreach(Diff d1, list1) { - if (!first) { - listString1 += ", "; - } - listString1 += d1.toString(); - first = false; } - listString1 += ")"; - QString listString2 = "("; - first = true; - foreach(Diff d2, list2) { - if (!first) { - listString2 += ", "; - } - listString2 += d2.toString(); - first = false; + + if (fail) { + // Build human readable description of both lists. + std::wstring listString1 = L"("; + bool first = true; + for(Diff d1: list1) { + if (!first) { + listString1 += L", "; + } + listString1 += d1.toString(); + first = false; + } + listString1 += L")"; + std::wstring listString2 = L"("; + first = true; + for(Diff d2: list2) { + if (!first) { + listString2 += L", "; + } + listString2 += d2.toString(); + first = false; + } + listString2 += L")"; + std::debug_print(L"%ls FAIL\nExpected: %ls\nActual: %ls", + qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); + throw strCase; } - listString2 += ")"; - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QList &list1, const QList &list2) { - bool fail = false; - if (list1.count() == list2.count()) { - int i = 0; - foreach(QVariant q1, list1) { - QVariant q2 = list2.value(i); - if (q1 != q2) { +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2) { + bool fail = false; + if (list1.size() == list2.size()) { + int i = 0; + for(std::dmp_variant q1: list1) { + std::dmp_variant q2 = list2[i]; + if (q1 != q2) { + fail = true; + break; + } + i++; + } + } else { fail = true; - break; - } - i++; - } - } else { - fail = true; - } - - if (fail) { - // Build human readable description of both lists. - QString listString1 = "("; - bool first = true; - foreach(QVariant q1, list1) { - if (!first) { - listString1 += ", "; - } - listString1 += q1.toString(); - first = false; } - listString1 += ")"; - QString listString2 = "("; - first = true; - foreach(QVariant q2, list2) { - if (!first) { - listString2 += ", "; - } - listString2 += q2.toString(); - first = false; + + if (fail) { + // Build human readable description of both lists. + std::wstring listString1 = L"("; + bool first = true; + for(std::dmp_variant q1: list1) { + if (!first) { + listString1 += L", "; + } + listString1 += var_to_string(q1); + first = false; + } + listString1 += L")"; + std::wstring listString2 = L"("; + first = true; + for(std::dmp_variant q2: list2) { + if (!first) { + listString2 += L", "; + } + listString2 += var_to_string(q2); + first = false; + } + listString2 += L")"; + std::debug_print(L"%ls FAIL\nExpected: %ls\nActual: %ls", + qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); + throw strCase; } - listString2 += ")"; - qDebug("%s FAIL\nExpected: %s\nActual: %s", - qPrintable(strCase), qPrintable(listString1), qPrintable(listString2)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2) { - if (var1 != var2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(var1.toString()), qPrintable(var2.toString())); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::dmp_variant &var1, const std::dmp_variant &var2) { + if (var1 != var2) { + std::debug_print(L"%ls FAIL\nExpected: %ls\nActual: %ls", qPrintable(strCase), + qPrintable(var_to_string(var1)), qPrintable(var_to_string(var2))); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QMap &m1, const QMap &m2) { - QMapIterator i1(m1), i2(m2); +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::unordered_map &m1, const std::unordered_map &m2) { + auto i1 = m1.begin(), i2 = m2.begin(); + + while (i1 != m1.end() && i2 != m2.end()) { + if (i1->first != i2->first || i1->second != i2->second) { + std::debug_print(L"%ls FAIL\nExpected: (%c, %d)\nActual: (%c, %d)", qPrintable(strCase), + i1->first, i1->second, i2->first, i2->second); + throw strCase; + } - while (i1.hasNext() && i2.hasNext()) { - i1.next(); - i2.next(); - if (i1.key() != i2.key() || i1.value() != i2.value()) { - qDebug("%s FAIL\nExpected: (%c, %d)\nActual: (%c, %d)", qPrintable(strCase), - i1.key().toAscii(), i1.value(), i2.key().toAscii(), i2.value()); - throw strCase; + i1++; + i2++; } - } - - if (i1.hasNext()) { - i1.next(); - qDebug("%s FAIL\nExpected: (%c, %d)\nActual: none", - qPrintable(strCase), i1.key().toAscii(), i1.value()); - throw strCase; - } - if (i2.hasNext()) { - i2.next(); - qDebug("%s FAIL\nExpected: none\nActual: (%c, %d)", - qPrintable(strCase), i2.key().toAscii(), i2.value()); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); + + if (i1 != m1.end()) { + std::debug_print(L"%ls FAIL\nExpected: (%c, %d)\nActual: none", + qPrintable(strCase), i1->first, i1->second); + throw strCase; + } + if (i2 != m2.end()) { + std::debug_print(L"%ls FAIL\nExpected: none\nActual: (%c, %d)", + qPrintable(strCase), i2->first, i2->second); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2) { - if (list1 != list2) { - qDebug("%s FAIL\nExpected: %s\nActual: %s", qPrintable(strCase), - qPrintable(list1.join(",")), qPrintable(list2.join(","))); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::wstring_list &list1, const std::wstring_list &list2) { + if (list1 != list2) { + std::debug_print(L"%ls FAIL\nExpected: %ls\nActual: %ls", qPrintable(strCase), + qPrintable(std::join(list1, L",")), qPrintable(std::join(list2, L","))); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertTrue(const QString &strCase, bool value) { - if (!value) { - qDebug("%s FAIL\nExpected: true\nActual: false", qPrintable(strCase)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertTrue(const std::wstring &strCase, bool value) { + if (!value) { + std::debug_print(L"%ls FAIL\nExpected: true\nActual: false", qPrintable(strCase)); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertFalse(const QString &strCase, bool value) { - if (value) { - qDebug("%s FAIL\nExpected: false\nActual: true", qPrintable(strCase)); - throw strCase; - } - qDebug("%s OK", qPrintable(strCase)); +void diff_match_patch_test::assertFalse(const std::wstring &strCase, bool value) { + if (value) { + std::debug_print(L"%ls FAIL\nExpected: false\nActual: true", qPrintable(strCase)); + throw strCase; + } + std::debug_print(L"%ls OK", qPrintable(strCase)); } // Construct the two texts which made up the diff originally. -QStringList diff_match_patch_test::diff_rebuildtexts(QList diffs) { - QStringList text; - text << QString("") << QString(""); - foreach (Diff myDiff, diffs) { - if (myDiff.operation != INSERT) { - text[0] += myDiff.text; - } - if (myDiff.operation != DELETE) { - text[1] += myDiff.text; +std::wstring_list diff_match_patch_test::diff_rebuildtexts(std::vector diffs) { + std::wstring_list text; + text.push_back(std::wstring(L"")); + text.push_back(std::wstring(L"")); + for (Diff myDiff: diffs) { + if (myDiff.operation != INSERT) { + text[0] += myDiff.text; + } + if (myDiff.operation != DELETE) { + text[1] += myDiff.text; + } } - } - return text; + return text; } -void diff_match_patch_test::assertEmpty(const QString &strCase, const QStringList &list) { - if (!list.isEmpty()) { - throw strCase; - } +void diff_match_patch_test::assertEmpty(const std::wstring &strCase, const std::wstring_list &list) { + if (list.size() != 0) { + throw strCase; + } } // Private function for quickly building lists of diffs. -QList diff_match_patch_test::diffList(Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, - Diff d6, Diff d7, Diff d8, Diff d9, Diff d10) { - // Diff(INSERT, NULL) is invalid and thus is used as the default argument. - QList listRet; - if (d1.operation == INSERT && d1.text == NULL) { - return listRet; - } - listRet << d1; +std::vector diff_match_patch_test::diffList(Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, + Diff d6, Diff d7, Diff d8, Diff d9, Diff d10) { + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + std::vector listRet; + if (d1.operation == INSERT && d1.invalid) { + return listRet; + } + listRet.push_back(d1); - if (d2.operation == INSERT && d2.text == NULL) { - return listRet; - } - listRet << d2; + if (d2.operation == INSERT && d2.invalid) { + return listRet; + } + listRet.push_back(d2); - if (d3.operation == INSERT && d3.text == NULL) { - return listRet; - } - listRet << d3; + if (d3.operation == INSERT && d3.invalid) { + return listRet; + } + listRet.push_back(d3); - if (d4.operation == INSERT && d4.text == NULL) { - return listRet; - } - listRet << d4; + if (d4.operation == INSERT && d4.invalid) { + return listRet; + } + listRet.push_back(d4); - if (d5.operation == INSERT && d5.text == NULL) { - return listRet; - } - listRet << d5; + if (d5.operation == INSERT && d5.invalid) { + return listRet; + } + listRet.push_back(d5); - if (d6.operation == INSERT && d6.text == NULL) { - return listRet; - } - listRet << d6; + if (d6.operation == INSERT && d6.invalid) { + return listRet; + } + listRet.push_back(d6); - if (d7.operation == INSERT && d7.text == NULL) { - return listRet; - } - listRet << d7; + if (d7.operation == INSERT && d7.invalid) { + return listRet; + } + listRet.push_back(d7); - if (d8.operation == INSERT && d8.text == NULL) { - return listRet; - } - listRet << d8; + if (d8.operation == INSERT && d8.invalid) { + return listRet; + } + listRet.push_back(d8); - if (d9.operation == INSERT && d9.text == NULL) { - return listRet; - } - listRet << d9; + if (d9.operation == INSERT && d9.invalid) { + return listRet; + } + listRet.push_back(d9); - if (d10.operation == INSERT && d10.text == NULL) { - return listRet; - } - listRet << d10; + if (d10.operation == INSERT && d10.invalid) { + return listRet; + } + listRet.push_back(d10); - return listRet; + return listRet; } /* -Compile instructions for MinGW and QT4 on Windows: -qmake -project -qmake -mingw32-make -g++ -o diff_match_patch_test debug\diff_match_patch_test.o debug\diff_match_patch.o \qt4\lib\libQtCore4.a -diff_match_patch_test.exe - -Compile insructions for OS X: -qmake -spec macx-g++ -make -./diff_match_patch + Compile instructions for MinGW and QT4 on Windows: + qmake -project + qmake + mingw32-make + g++ -o diff_match_patch_test debug\diff_match_patch_test.o debug\diff_match_patch.o \qt4\lib\libQtCore4.a + diff_match_patch_test.exe + + Compile insructions for OS X: + qmake -spec macx-g++ + make + ./diff_match_patch */ diff --git a/cpp/diff_match_patch_test.h b/cpp/diff_match_patch_test.h index 9792222..f4bdc51 100644 --- a/cpp/diff_match_patch_test.h +++ b/cpp/diff_match_patch_test.h @@ -20,70 +20,70 @@ #define DIFF_MATCH_PATCH_TEST_H class diff_match_patch_test { - public: - diff_match_patch_test(); - void run_all_tests(); +public: + diff_match_patch_test(); + void run_all_tests(); - // DIFF TEST FUNCTIONS - void testDiffCommonPrefix(); - void testDiffCommonSuffix(); - void testDiffCommonOverlap(); - void testDiffHalfmatch(); - void testDiffLinesToChars(); - void testDiffCharsToLines(); - void testDiffCleanupMerge(); - void testDiffCleanupSemanticLossless(); - void testDiffCleanupSemantic(); - void testDiffCleanupEfficiency(); - void testDiffPrettyHtml(); - void testDiffText(); - void testDiffDelta(); - void testDiffXIndex(); - void testDiffLevenshtein(); - void testDiffBisect(); - void testDiffMain(); + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); - // MATCH TEST FUNCTIONS - void testMatchAlphabet(); - void testMatchBitap(); - void testMatchMain(); + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); - // PATCH TEST FUNCTIONS - void testPatchObj(); - void testPatchFromText(); - void testPatchToText(); - void testPatchAddContext(); - void testPatchMake(); - void testPatchSplitMax(); - void testPatchAddPadding(); - void testPatchApply(); + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); - private: - diff_match_patch dmp; +private: + diff_match_patch dmp; - // Define equality. - void assertEquals(const QString &strCase, int n1, int n2); - void assertEquals(const QString &strCase, const QString &s1, const QString &s2); - void assertEquals(const QString &strCase, const Diff &d1, const Diff &d2); - void assertEquals(const QString &strCase, const QList &list1, const QList &list2); - void assertEquals(const QString &strCase, const QList &list1, const QList &list2); - void assertEquals(const QString &strCase, const QVariant &var1, const QVariant &var2); - void assertEquals(const QString &strCase, const QMap &m1, const QMap &m2); - void assertEquals(const QString &strCase, const QStringList &list1, const QStringList &list2); - void assertTrue(const QString &strCase, bool value); - void assertFalse(const QString &strCase, bool value); - void assertEmpty(const QString &strCase, const QStringList &list); + // Define equality. + void assertEquals(const std::wstring &strCase, int n1, int n2); + void assertEquals(const std::wstring &strCase, const std::wstring &s1, const std::wstring &s2); + void assertEquals(const std::wstring &strCase, const Diff &d1, const Diff &d2); + void assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2); + void assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2); + void assertEquals(const std::wstring &strCase, const std::dmp_variant &var1, const std::dmp_variant &var2); + void assertEquals(const std::wstring &strCase, const std::unordered_map &m1, const std::unordered_map &m2); + void assertEquals(const std::wstring &strCase, const std::wstring_list &list1, const std::wstring_list &list2); + void assertTrue(const std::wstring &strCase, bool value); + void assertFalse(const std::wstring &strCase, bool value); + void assertEmpty(const std::wstring &strCase, const std::wstring_list &list); - // Construct the two texts which made up the diff originally. - QStringList diff_rebuildtexts(QList diffs); - // Private function for quickly building lists of diffs. - QList diffList( - // Diff(INSERT, NULL) is invalid and thus is used as the default argument. - Diff d1 = Diff(INSERT, NULL), Diff d2 = Diff(INSERT, NULL), - Diff d3 = Diff(INSERT, NULL), Diff d4 = Diff(INSERT, NULL), - Diff d5 = Diff(INSERT, NULL), Diff d6 = Diff(INSERT, NULL), - Diff d7 = Diff(INSERT, NULL), Diff d8 = Diff(INSERT, NULL), - Diff d9 = Diff(INSERT, NULL), Diff d10 = Diff(INSERT, NULL)); + // Construct the two texts which made up the diff originally. + std::wstring_list diff_rebuildtexts(std::vector diffs); + // Private function for quickly building lists of diffs. + std::vector diffList( + // Diff(INSERT, NULL) is invalid and thus is used as the default argument. + Diff d1 = Diff(INSERT, NULL), Diff d2 = Diff(INSERT, NULL), + Diff d3 = Diff(INSERT, NULL), Diff d4 = Diff(INSERT, NULL), + Diff d5 = Diff(INSERT, NULL), Diff d6 = Diff(INSERT, NULL), + Diff d7 = Diff(INSERT, NULL), Diff d8 = Diff(INSERT, NULL), + Diff d9 = Diff(INSERT, NULL), Diff d10 = Diff(INSERT, NULL)); }; #endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp/diff_match_patch_util.cpp b/cpp/diff_match_patch_util.cpp new file mode 100644 index 0000000..e17dcb0 --- /dev/null +++ b/cpp/diff_match_patch_util.cpp @@ -0,0 +1,184 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include "diff_match_patch_util.h" + +namespace std { +void replace_all(std::wstring& str, const std::wstring& from, const std::wstring& to) { + if(from.empty()) + return; + size_t start_pos = 0; + while((start_pos = str.find(from, start_pos)) != std::wstring::npos) { + str.replace(start_pos, from.length(), to); + start_pos += to.length(); // In case 'to' contains 'from', like replacing 'x' with 'yx' + } +} + +bool ends_with(const std::wstring & a, const std::wstring& b) { + if (b.length() == 0) return true; + if (a.length() == 0) return false; + if (a.length() < b.length()) return false; + + auto r = a.rfind(b); + + if (r == std::wstring::npos) return false; + + return r + b.length() == a.length(); +} + +bool starts_with(const std::wstring & a, const std::wstring& b) { + return a.find(b) == 0; +} + +std::wstring_list split(const std::wstring & s, wchar_t delimiter, bool skip_empty) { + std::wstring_list tokens; + + std::wistringstream f(s); + + std::wstring ss; + + while(std::getline(f, ss, delimiter)) { + if (skip_empty && ss.length() == 0) + continue; + tokens.push_back(ss); + } + + return tokens; +} + +std::wstring format(const wchar_t * f, ...) +{ + va_list args; + va_start (args, f); + size_t len = std::vswprintf(NULL, 0, f, args); + va_end (args); + std::vector vec(len + 1); + va_start (args, f); + std::vswprintf(&vec[0], len + 1, f, args); + va_end (args); + return std::wstring{&vec[0]}; +} + +void debug_print(const wchar_t * f, ...) +{ + va_list args; + va_start (args, f); + size_t len = std::vswprintf(NULL, 0, f, args); + va_end (args); + std::vector vec(len + 1); + va_start (args, f); + std::vswprintf(&vec[0], len + 1, f, args); + va_end (args); + + std::wcerr << std::wstring{&vec[0]}; +} + + +static +std::wstring_convert, wchar_t> wcharconv; + +std::string url_encode(const std::string &value, const std::string & exclude) { + std::ostringstream escaped; + escaped.fill('0'); + escaped << std::hex; + + for (std::string::const_iterator i = value.begin(), n = value.end(); i != n; ++i) { + std::string::value_type c = (*i); + + // Keep alphanumeric and other accepted characters intact + if (isalnum(c) || exclude.find(c) != std::string::npos) { + escaped << c; + continue; + } + + // Any other characters are percent-encoded + escaped << uppercase; + escaped << '%' << setw(2) << int((unsigned char) c); + escaped << nouppercase; + } + + return escaped.str(); +} + +std::wstring url_encode(const std::wstring &value, const std::string & exclude) { + std::string utf8_str = wcharconv.to_bytes(value); + + auto result = url_encode(utf8_str, exclude); + + return wcharconv.from_bytes(result); +} + + +static +int get_val(std::string::value_type c) { + if (c >= '0' && c <='9') + return c - '0'; + + if (c >= 'A' && c <= 'F') + return 10 + c - 'A'; + + if (c >= 'a' && c <= 'f') + return 10 + c - 'a'; + + return -1; +} + +std::string url_decode(const std::string &value) { + std::ostringstream escaped; + + for (std::string::const_iterator i = value.begin(), n = value.end(); i != n; ++i) { + std::string::value_type c = (*i); + + + if (c == '%') { + auto v1 = get_val(*(i + 1)); + auto v2 = get_val(*(i + 2)); + + if (v1 < 0 || v2 < 0) + throw std::string("invalid value:") + value; + + escaped << (unsigned char)((v1 << 4) + v2); + i+=2; + continue; + } + + escaped << c; + } + + return escaped.str(); +} + +std::wstring url_decode(const std::wstring &value) { + std::string utf8_str = wcharconv.to_bytes(value); + + auto result = url_decode(utf8_str); + + return wcharconv.from_bytes(result); +} + +std::wstring var_to_string(const dmp_variant & var) { + if (std::holds_alternative(var)) { + return std::get(var); + } + if (std::holds_alternative(var)) { + return join(std::get(var), L","); + } + + return L""; +} + +} diff --git a/cpp/diff_match_patch_util.h b/cpp/diff_match_patch_util.h new file mode 100644 index 0000000..e53bccd --- /dev/null +++ b/cpp/diff_match_patch_util.h @@ -0,0 +1,40 @@ +#pragma once + +#include +#include +#include +#include +#include + +namespace std { +using wstring_list = std::vector; +using dmp_variant = std::variant; + +void replace_all(std::wstring& str, const std::wstring& from, const std::wstring& to); +bool ends_with(const std::wstring & a, const std::wstring& b); +bool starts_with(const std::wstring & a, const std::wstring& b); +std::wstring_list split(const std::wstring & s, wchar_t delimiter, bool skip_empty=false); +std::wstring format(const wchar_t * f, ...); +std::string url_encode(const std::string &value, const std::string & exclude="-_.~"); +std::wstring url_encode(const std::wstring &value, const std::string & exclude="-_.~"); +std::string url_decode(const std::string &value); +std::wstring url_decode(const std::wstring &value); +void debug_print(const wchar_t * f, ...); +template +std::wstring join(const InputIterator & begin, + const InputIterator & end, + const std::wstring & delimiters) { + std::wstringstream ss1; + std::copy(begin, end, + std::ostream_iterator(ss1, delimiters.c_str())); + + return ss1.str(); +} + +template +std::wstring join(const v_t & v, const std::wstring & delimiters) { + return join(v.begin(), v.end(), delimiters); +} + +std::wstring var_to_string(const dmp_variant & var); +} From fa179e99b24c5937d486c5e656c950003cc2a12c Mon Sep 17 00:00:00 2001 From: stonewell Date: Thu, 25 Apr 2019 23:42:25 -0700 Subject: [PATCH 2/7] fix some tests --- cpp/diff_match_patch.cpp | 37 ++++++++++++++---------- cpp/diff_match_patch_test.cpp | 25 +++++++++-------- cpp/diff_match_patch_util.cpp | 53 ++++++++++++++++++++++++----------- 3 files changed, 72 insertions(+), 43 deletions(-) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index 141f4c8..286eaa3 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -536,29 +536,34 @@ std::vector diff_match_patch::diff_linesToChars(const std::wst std::wstring diff_match_patch::diff_linesToCharsMunge(const std::wstring &text, std::wstring_list &lineArray, std::unordered_map &lineHash) { - int lineStart = 0; - int lineEnd = -1; + std::wstring::size_type lineStart = 0; + std::wstring::size_type lineEnd = 0; std::wstring line; std::wstring chars; + + if (text.length() == 0) + return chars; + // Walk the text, pulling out a substring for each line. // text.split('\n') would would temporarily double our memory footprint. // Modifying text would create many large strings to garbage collect. - while (lineEnd < text.length() - 1) { + do { lineEnd = text.find(L'\n', lineStart); - if (lineEnd == -1) { + if (lineEnd == std::wstring::npos) { lineEnd = text.length() - 1; } line = safeMid(text, lineStart, lineEnd + 1 - lineStart); lineStart = lineEnd + 1; if (lineHash.find(line) != lineHash.end()) { - chars += char(static_cast(lineHash[line])); + chars += wchar_t(lineHash[line]); } else { lineArray.push_back(line); lineHash.emplace(line, lineArray.size() - 1); - chars += char(static_cast(lineArray.size() - 1)); + chars += wchar_t(lineArray.size() - 1); } - } + } while (lineEnd < text.length() - 1); + return chars; } @@ -570,7 +575,7 @@ void diff_match_patch::diff_charsToLines(std::vector &diffs, while (i != diffs.end()) { Diff &diff = *i; std::wstring text; - for (int y = 0; y < diff.text.length(); y++) { + for (std::wstring::size_type y = 0; y < diff.text.length(); y++) { text += lineArray.at(static_cast(diff.text[y])); } diff.text = text; @@ -636,7 +641,7 @@ int diff_match_patch::diff_commonOverlap(const std::wstring &text1, int length = 1; while (true) { std::wstring pattern = text1_trunc.substr(text1_trunc.length() - length); - int found = text2_trunc.find(pattern); + auto found = text2_trunc.find(pattern); if (found == std::wstring::npos) { return best; } @@ -698,11 +703,11 @@ std::wstring_list diff_match_patch::diff_halfMatchI(const std::wstring &longtext int i) { // Start with a 1/4 length substring at position i as a seed. const std::wstring seed = safeMid(longtext, i, longtext.length() / 4); - int j = -1; + std::wstring::size_type j = 0; std::wstring best_common; std::wstring best_longtext_a, best_longtext_b; std::wstring best_shorttext_a, best_shorttext_b; - while ((j = shorttext.find(seed, j + 1)) != std::wstring::npos) { + while ((j = shorttext.find(seed, j)) != std::wstring::npos) { const int prefixLength = diff_commonPrefix(safeMid(longtext, i), safeMid(shorttext, j)); const int suffixLength = diff_commonSuffix(longtext.substr(0, i), @@ -715,6 +720,8 @@ std::wstring_list diff_match_patch::diff_halfMatchI(const std::wstring &longtext best_shorttext_a = shorttext.substr(0, j - suffixLength); best_shorttext_b = safeMid(shorttext, j + prefixLength); } + + j++; } if (best_common.length() * 2 >= longtext.length()) { std::wstring_list listRet; @@ -1490,13 +1497,13 @@ int diff_match_patch::match_bitap(const std::wstring &text, const std::wstring & // Highest score beyond which we give up. double score_threshold = Match_Threshold; // Is there a nearby exact match? (speedup) - int best_loc = text.find(pattern, loc); - if (best_loc != -1) { + auto best_loc = text.find(pattern, loc); + if (best_loc != std::wstring::npos) { score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); // What about in the other direction? (speedup) best_loc = text.find_last_of(pattern, loc + pattern.length()); - if (best_loc != -1) { + if (best_loc != std::wstring::npos) { score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); } @@ -1563,7 +1570,7 @@ int diff_match_patch::match_bitap(const std::wstring &text, const std::wstring & best_loc = j - 1; if (best_loc > loc) { // When passing loc, don't exceed our current distance from loc. - start = std::max(1, 2 * loc - best_loc); + start = std::max((std::wstring::size_type)1, 2 * loc - best_loc); } else { // Already passed loc, downhill from here on in. break; diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index 41aae8c..2dd59cc 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -23,7 +23,7 @@ #include "diff_match_patch.h" #include "diff_match_patch_test.h" -#define qPrintable(x) (x.c_str()) +#define qPrintable(x) (url_encode(x, " !~*'();/?:@&=+$,#-_.~").c_str()) int main(int argc, char **argv) { diff_match_patch_test dmp_test; @@ -147,8 +147,8 @@ void diff_match_patch_test::testDiffLinesToChars() { tmpVector.push_back(L""); tmpVector.push_back(L"alpha\n"); tmpVector.push_back(L"beta\n"); - tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(1) + wchar_t(2) + wchar_t(1))); //((L"\u0001\u0002\u0001")); - tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(2) + wchar_t(1) + wchar_t(2))); // ((L"\u0002\u0001\u0002")); + tmpVarList.push_back(std::dmp_variant(L"\u0001\u0002\u0001")); //((L"\u0001\u0002\u0001")); + tmpVarList.push_back(std::dmp_variant(L"\u0002\u0001\u0002")); // ((L"\u0002\u0001\u0002")); tmpVarList.push_back(std::dmp_variant(tmpVector)); assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"alpha\nbeta\nalpha\n", L"beta\nalpha\nbeta\n")); @@ -159,7 +159,7 @@ void diff_match_patch_test::testDiffLinesToChars() { tmpVector.push_back(L"beta\r\n"); tmpVector.push_back(L"\r\n"); tmpVarList.push_back(std::dmp_variant(std::wstring(L""))); - tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(1) + wchar_t(2) + wchar_t(3) + wchar_t(3))); // ((L"\u0001\u0002\u0003\u0003")); + tmpVarList.push_back(std::dmp_variant(L"\u0001\u0002\u0003\u0003")); // ((L"\u0001\u0002\u0003\u0003")); tmpVarList.push_back(std::dmp_variant(tmpVector)); assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"", L"alpha\r\nbeta\r\n\r\n\r\n")); @@ -168,8 +168,8 @@ void diff_match_patch_test::testDiffLinesToChars() { tmpVector.push_back(L""); tmpVector.push_back(L"a"); tmpVector.push_back(L"b"); - tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(1))); // ((L"\u0001"))); - tmpVarList.push_back(std::dmp_variant(std::wstring() + wchar_t(2))); // ((L"\u0002")); + tmpVarList.push_back(std::dmp_variant(L"\u0001")); // ((L"\u0001"))); + tmpVarList.push_back(std::dmp_variant(L"\u0002")); // ((L"\u0002")); tmpVarList.push_back(std::dmp_variant(tmpVector)); assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"a", L"b")); @@ -186,11 +186,11 @@ void diff_match_patch_test::testDiffLinesToChars() { } assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); assertEquals(L"diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.push_back(L""); + tmpVector.insert(tmpVector.begin(), L""); tmpVarList.push_back(std::dmp_variant(chars)); tmpVarList.push_back(std::dmp_variant(std::wstring(L""))); tmpVarList.push_back(std::dmp_variant(tmpVector)); - assertEquals(L"diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, L"")); + assertEquals(L"1. diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, L"")); } void diff_match_patch_test::testDiffCharsToLines() { @@ -201,8 +201,8 @@ void diff_match_patch_test::testDiffCharsToLines() { // Convert chars up to lines. std::vector diffs; - diffs.push_back(Diff(EQUAL, std::wstring() + wchar_t(1) + wchar_t(2) + wchar_t(1))); // (L"\u0001\u0002\u0001"); - diffs.push_back(Diff(INSERT, std::wstring() + wchar_t(2) + wchar_t(1) + wchar_t(2))); // (L"\u0002\u0001\u0002"); + diffs.push_back(Diff(EQUAL, L"\u0001\u0002\u0001")); // (L"\u0001\u0002\u0001"); + diffs.push_back(Diff(INSERT, L"\u0002\u0001\u0002")); // (L"\u0002\u0001\u0002"); std::wstring_list tmpVector; tmpVector.push_back(L""); tmpVector.push_back(L"alpha\n"); @@ -223,10 +223,10 @@ void diff_match_patch_test::testDiffCharsToLines() { } assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); assertEquals(L"diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.push_back(L""); + tmpVector.insert(tmpVector.begin(), L""); diffs = diffList(Diff(DELETE, chars)); dmp.diff_charsToLines(diffs, tmpVector); - assertEquals(L"diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); + assertEquals(L"2. diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); } void diff_match_patch_test::testDiffCleanupMerge() { @@ -1001,6 +1001,7 @@ void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std: for(std::dmp_variant q1: list1) { std::dmp_variant q2 = list2[i]; if (q1 != q2) { + std::debug_print(L"variable %d not equal, (%ls)_____________ (%ls)", i, qPrintable(var_to_string(q1)), qPrintable(var_to_string(q2))); fail = true; break; } diff --git a/cpp/diff_match_patch_util.cpp b/cpp/diff_match_patch_util.cpp index e17dcb0..88fd563 100644 --- a/cpp/diff_match_patch_util.cpp +++ b/cpp/diff_match_patch_util.cpp @@ -59,28 +59,49 @@ std::wstring_list split(const std::wstring & s, wchar_t delimiter, bool skip_emp std::wstring format(const wchar_t * f, ...) { va_list args; - va_start (args, f); - size_t len = std::vswprintf(NULL, 0, f, args); - va_end (args); - std::vector vec(len + 1); - va_start (args, f); - std::vswprintf(&vec[0], len + 1, f, args); - va_end (args); + + size_t len = 4096; + int result = -1; + std::vector vec{}; + + do { + vec.resize(len); + va_start (args, f); + result = std::vswprintf(&vec[0], len - 1, f, args); + + if (result > 0) { + vec[result] = 0; + } else { + len *= 2; + } + va_end (args); + } while(result < 0); + return std::wstring{&vec[0]}; } void debug_print(const wchar_t * f, ...) { va_list args; - va_start (args, f); - size_t len = std::vswprintf(NULL, 0, f, args); - va_end (args); - std::vector vec(len + 1); - va_start (args, f); - std::vswprintf(&vec[0], len + 1, f, args); - va_end (args); - - std::wcerr << std::wstring{&vec[0]}; + + size_t len = 4096; + int result = -1; + std::vector vec{}; + + do { + vec.resize(len); + va_start (args, f); + result = std::vswprintf(&vec[0], len - 1, f, args); + + if (result > 0) { + vec[result] = 0; + } else { + len *= 2; + } + va_end (args); + } while(result < 0); + + std::wcerr << std::wstring{&vec[0]} << std::endl; } From e9fb1c454ee45850e054c0cab57fcd54790dbef9 Mon Sep 17 00:00:00 2001 From: stonewell Date: Sat, 27 Apr 2019 09:59:22 -0700 Subject: [PATCH 3/7] fix cleanupMerge --- cpp/diff_match_patch.cpp | 54 +++++++++++++++++------------------ cpp/diff_match_patch_test.cpp | 4 +-- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index 286eaa3..1852d5a 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -353,7 +353,7 @@ std::vector diff_match_patch::diff_lineMode(std::wstring text1, std::wstri // Upon reaching an equality, check for prior redundancies. if (count_delete >= 1 && count_insert >= 1) { // Delete the offending records and add the merged ones. - std::prev(pointer); + pointer = std::prev(pointer); for (int j = 0; j < count_delete + count_insert; j++) { diffs.erase(pointer - 1); } @@ -889,8 +889,12 @@ void diff_match_patch::diff_cleanupSemantic(std::vector &diffs) { namespace std { template -T * safe_next_element(std::vector v, typename std::vector::iterator & it) { - if (it != std::prev(v.end())) return &(*std::next(it)); +T * safe_next_element(const std::vector & v, typename std::vector::iterator & it) { + if (it != v.end()) { + T * p = &(*it); + it++; + return p; + } return nullptr; } @@ -955,10 +959,8 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::vector &diffs) { prevDiff->text = bestEquality1; } else { std::advance(pointer, -3); - auto v = pointer + 1; //this diff diffs.erase(pointer); - pointer = v; //to this diff - std::next(pointer);//pass next diff + pointer = std::next(pointer);//pass next diff } thisDiff->text = bestEdit; if (!bestEquality2.empty()) { @@ -1084,16 +1086,15 @@ void diff_match_patch::diff_cleanupEfficiency(std::vector &diffs) { // printf("Splitting: '%s'\n", qPrintable(lastequality)); // Walk back to offending equality. while (*thisDiff != equalities.front()) { - thisDiff = &(*std::prev(pointer)); + pointer = std::prev(pointer); + thisDiff = &(*pointer); } - std::next(pointer); // Replace equality with a delete. *pointer = Diff(DELETE, lastequality); // Insert a corresponding an insert. diffs.insert(pointer, Diff(INSERT, lastequality)); thisDiff = &(*std::prev(pointer)); - std::next(pointer); equalities.pop_front(); // Throw away the equality we just deleted. lastequality = std::wstring(); @@ -1159,26 +1160,22 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { if (count_delete + count_insert > 1) { bool both_types = count_delete != 0 && count_insert != 0; // Delete the offending records. - std::prev(pointer); // Reverse direction. - while (count_delete-- > 0) { - diffs.erase(pointer - 1); - } - while (count_insert-- > 0) { - diffs.erase(pointer - 1); - } + pointer = std::prev(pointer);//go back to thisDiff + pointer = diffs.erase(pointer - count_delete - count_insert, pointer); + if (both_types) { // Factor out any common prefixies. commonlength = diff_commonPrefix(text_insert, text_delete); if (commonlength != 0) { if (pointer != diffs.begin()) { - thisDiff = &(*std::prev(pointer)); + thisDiff = &(*(pointer - 1)); if (thisDiff->operation != EQUAL) { throw "Previous diff should have been an equality."; } thisDiff->text += text_insert.substr(0, commonlength); - std::next(pointer); } else { diffs.insert(pointer, Diff(EQUAL, text_insert.substr(0, commonlength))); + pointer = std::next(pointer); } text_insert = safeMid(text_insert, commonlength); text_delete = safeMid(text_delete, commonlength); @@ -1186,32 +1183,32 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { // Factor out any common suffixies. commonlength = diff_commonSuffix(text_insert, text_delete); if (commonlength != 0) { - thisDiff = &(*std::next(pointer)); + thisDiff = &(*pointer); thisDiff->text = safeMid(text_insert, text_insert.length() - commonlength) + thisDiff->text; text_insert = text_insert.substr(0, text_insert.length() - commonlength); text_delete = text_delete.substr(0, text_delete.length() - commonlength); - std::prev(pointer); } } // Insert the merged records. if (!text_delete.empty()) { diffs.insert(pointer, Diff(DELETE, text_delete)); + pointer = std::next(pointer); } if (!text_insert.empty()) { diffs.insert(pointer, Diff(INSERT, text_insert)); + pointer = std::next(pointer); } // Step forward to the equality. thisDiff = std::safe_next_element(diffs, pointer); - } else if (prevEqual != NULL) { // Merge this equality with the previous one. prevEqual->text += thisDiff->text; - diffs.erase(pointer++); - thisDiff = &(*std::prev(pointer)); - std::next(pointer); // Forward direction + pointer = std::prev(pointer);//move back to thisDiff + pointer = diffs.erase(pointer); + thisDiff = prevEqual; } count_insert = 0; count_delete = 0; @@ -1222,6 +1219,7 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { } thisDiff = std::safe_next_element(diffs, pointer); } + if (diffs.back().text.empty()) { diffs.erase(diffs.end() - 1); } @@ -1251,7 +1249,8 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { - prevDiff->text.length()); nextDiff->text = prevDiff->text + nextDiff->text; std::advance(pointer, -3); - diffs.erase(pointer++); // Delete prevDiff. + pointer = diffs.erase(pointer); // Delete prevDiff. + pointer = std::next(pointer);//Walk past thisDiff thisDiff = std::safe_next_element(diffs, pointer); nextDiff = std::safe_next_element(diffs, pointer); changes = true; @@ -1260,8 +1259,9 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { prevDiff->text += nextDiff->text; thisDiff->text = safeMid(thisDiff->text, nextDiff->text.length()) + nextDiff->text; - diffs.erase(pointer++); // Delete nextDiff. - nextDiff = (pointer != diffs.end()) ? &(*pointer) : NULL; + pointer = std::prev(pointer);//back to next diff + pointer = diffs.erase(pointer); // Delete nextDiff. + nextDiff = std::safe_next_element(diffs, pointer); changes = true; } } diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index 2dd59cc..02aac9a 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -190,7 +190,7 @@ void diff_match_patch_test::testDiffLinesToChars() { tmpVarList.push_back(std::dmp_variant(chars)); tmpVarList.push_back(std::dmp_variant(std::wstring(L""))); tmpVarList.push_back(std::dmp_variant(tmpVector)); - assertEquals(L"1. diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, L"")); + assertEquals(L"diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, L"")); } void diff_match_patch_test::testDiffCharsToLines() { @@ -226,7 +226,7 @@ void diff_match_patch_test::testDiffCharsToLines() { tmpVector.insert(tmpVector.begin(), L""); diffs = diffList(Diff(DELETE, chars)); dmp.diff_charsToLines(diffs, tmpVector); - assertEquals(L"2. diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); + assertEquals(L"diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); } void diff_match_patch_test::testDiffCleanupMerge() { From 91276b005fcd1615d36845ddd38e3dcd1f32a317 Mon Sep 17 00:00:00 2001 From: stonewell Date: Sat, 27 Apr 2019 18:26:34 -0700 Subject: [PATCH 4/7] change most of vector -> list, since we need keep reference same when container insert/erase --- cpp/diff_match_patch.cpp | 224 ++++++++++++++++++++-------------- cpp/diff_match_patch.h | 66 +++++----- cpp/diff_match_patch_test.cpp | 146 +++++++++++----------- cpp/diff_match_patch_test.h | 8 +- cpp/diff_match_patch_util.cpp | 2 +- cpp/diff_match_patch_util.h | 3 +- 6 files changed, 245 insertions(+), 204 deletions(-) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index 1852d5a..b4ef4b4 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -183,12 +183,12 @@ diff_match_patch::diff_match_patch() : } -std::vector diff_match_patch::diff_main(const std::wstring &text1, +std::list diff_match_patch::diff_main(const std::wstring &text1, const std::wstring &text2) { return diff_main(text1, text2, true); } -std::vector diff_match_patch::diff_main(const std::wstring &text1, +std::list diff_match_patch::diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines) { // Set a deadline by which time the diff must be complete. clock_t deadline; @@ -200,7 +200,7 @@ std::vector diff_match_patch::diff_main(const std::wstring &text1, return diff_main(text1, text2, checklines, deadline); } -std::vector diff_match_patch::diff_main(const std::wstring &text1, +std::list diff_match_patch::diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline) { // Check for null inputs. if (text1.empty() || text2.empty()) { @@ -208,7 +208,7 @@ std::vector diff_match_patch::diff_main(const std::wstring &text1, } // Check for equality (speedup). - std::vector diffs; + std::list diffs; if (text1 == text2) { if (!text1.empty()) { diffs.push_back(Diff(EQUAL, text1)); @@ -245,9 +245,9 @@ std::vector diff_match_patch::diff_main(const std::wstring &text1, } -std::vector diff_match_patch::diff_compute(std::wstring text1, std::wstring text2, +std::list diff_match_patch::diff_compute(std::wstring text1, std::wstring text2, bool checklines, clock_t deadline) { - std::vector diffs; + std::list diffs; if (text1.empty()) { // Just add some text (speedup). @@ -288,15 +288,16 @@ std::vector diff_match_patch::diff_compute(std::wstring text1, std::wstrin const std::wstring_list hm = diff_halfMatch(text1, text2); if (hm.size() > 0) { // A half-match was found, sort out the return data. - const std::wstring text1_a = hm[0]; - const std::wstring text1_b = hm[1]; - const std::wstring text2_a = hm[2]; - const std::wstring text2_b = hm[3]; - const std::wstring mid_common = hm[4]; + auto it = hm.begin(); + const std::wstring text1_a = *it++; + const std::wstring text1_b = *it++; + const std::wstring text2_a = *it++; + const std::wstring text2_b = *it++; + const std::wstring mid_common = *it++; // Send both pairs off for separate processing. - const std::vector diffs_a = diff_main(text1_a, text2_a, + const std::list diffs_a = diff_main(text1_a, text2_a, checklines, deadline); - const std::vector diffs_b = diff_main(text1_b, text2_b, + const std::list diffs_b = diff_main(text1_b, text2_b, checklines, deadline); // Merge the results. diffs = diffs_a; @@ -314,15 +315,16 @@ std::vector diff_match_patch::diff_compute(std::wstring text1, std::wstrin } -std::vector diff_match_patch::diff_lineMode(std::wstring text1, std::wstring text2, +std::list diff_match_patch::diff_lineMode(std::wstring text1, std::wstring text2, clock_t deadline) { // Scan the text on a line-by-line basis first. - const std::vector b = diff_linesToChars(text1, text2); - text1 = std::get(b[0]); - text2 = std::get(b[1]); - std::wstring_list linearray = std::get(b[2]); + const std::list b = diff_linesToChars(text1, text2); + auto it = b.begin(); + text1 = std::get(*it++); + text2 = std::get(*it++); + std::wstring_list linearray = std::get(*it++); - std::vector diffs = diff_main(text1, text2, false, deadline); + std::list diffs = diff_main(text1, text2, false, deadline); // Convert the diff back to original text. diff_charsToLines(diffs, linearray); @@ -337,7 +339,7 @@ std::vector diff_match_patch::diff_lineMode(std::wstring text1, std::wstri std::wstring text_delete = L""; std::wstring text_insert = L""; - std::vector::iterator pointer = diffs.begin(); + std::list::iterator pointer = diffs.begin(); while (pointer != diffs.end()) { Diff *thisDiff = &(*pointer); switch (thisDiff->operation) { @@ -354,12 +356,14 @@ std::vector diff_match_patch::diff_lineMode(std::wstring text1, std::wstri if (count_delete >= 1 && count_insert >= 1) { // Delete the offending records and add the merged ones. pointer = std::prev(pointer); - for (int j = 0; j < count_delete + count_insert; j++) { - diffs.erase(pointer - 1); - } + + auto p2 = pointer; + std::advance(p2, -1 * (count_delete + count_insert)); + + diffs.erase(p2, pointer); for(Diff newDiff: diff_main(text_delete, text_insert, false, deadline)) { - diffs.insert(pointer, newDiff); + pointer = std::next(diffs.insert(pointer, newDiff)); } } count_insert = 0; @@ -371,13 +375,13 @@ std::vector diff_match_patch::diff_lineMode(std::wstring text1, std::wstri pointer++; } - diffs.erase(diffs.end() - 1); + diffs.erase(--diffs.end()); return diffs; } -std::vector diff_match_patch::diff_bisect(const std::wstring &text1, +std::list diff_match_patch::diff_bisect(const std::wstring &text1, const std::wstring &text2, clock_t deadline) { // Cache the text lengths to prevent multiple calls. const int text1_length = text1.length(); @@ -489,13 +493,13 @@ std::vector diff_match_patch::diff_bisect(const std::wstring &text1, delete [] v2; // Diff took too long and hit the deadline or // number of diffs equals number of characters, no commonality at all. - std::vector diffs; + std::list diffs; diffs.push_back(Diff(DELETE, text1)); diffs.push_back(Diff(INSERT, text2)); return diffs; } -std::vector diff_match_patch::diff_bisectSplit(const std::wstring &text1, +std::list diff_match_patch::diff_bisectSplit(const std::wstring &text1, const std::wstring &text2, int x, int y, clock_t deadline) { std::wstring text1a = text1.substr(0, x); std::wstring text2a = text2.substr(0, y); @@ -503,15 +507,15 @@ std::vector diff_match_patch::diff_bisectSplit(const std::wstring &text1, std::wstring text2b = safeMid(text2, y); // Compute both diffs serially. - std::vector diffs = diff_main(text1a, text2a, false, deadline); - std::vector diffsb = diff_main(text1b, text2b, false, deadline); + std::list diffs = diff_main(text1a, text2a, false, deadline); + std::list diffsb = diff_main(text1b, text2b, false, deadline); diffs.insert(std::end(diffs), std::begin(diffsb), std::end(diffsb)); return diffs; } -std::vector diff_match_patch::diff_linesToChars(const std::wstring &text1, +std::list diff_match_patch::diff_linesToChars(const std::wstring &text1, const std::wstring &text2) { std::wstring_list lineArray; std::unordered_map lineHash; @@ -525,7 +529,7 @@ std::vector diff_match_patch::diff_linesToChars(const std::wst const std::wstring chars1 = diff_linesToCharsMunge(text1, lineArray, lineHash); const std::wstring chars2 = diff_linesToCharsMunge(text2, lineArray, lineHash); - std::vector listRet; + std::list listRet; listRet.push_back(std::dmp_variant{chars1}); listRet.push_back(std::dmp_variant{chars2}); listRet.push_back(std::dmp_variant{lineArray}); @@ -569,9 +573,9 @@ std::wstring diff_match_patch::diff_linesToCharsMunge(const std::wstring &text, -void diff_match_patch::diff_charsToLines(std::vector &diffs, +void diff_match_patch::diff_charsToLines(std::list &diffs, const std::wstring_list &lineArray) { - std::vector::iterator i = diffs.begin(); + std::list::iterator i = diffs.begin(); while (i != diffs.end()) { Diff &diff = *i; std::wstring text; @@ -737,14 +741,14 @@ std::wstring_list diff_match_patch::diff_halfMatchI(const std::wstring &longtext } -void diff_match_patch::diff_cleanupSemantic(std::vector &diffs) { +void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { if (diffs.empty()) { return; } bool changes = false; std::deque equalities; // Stack of equalities. std::wstring lastequality; // Always equal to equalities.lastElement().text - std::vector::iterator pointer = diffs.begin(); + std::list::iterator pointer = diffs.begin(); // Number of characters that changed prior to the equality. int length_insertions1 = 0; int length_deletions1 = 0; @@ -786,7 +790,7 @@ void diff_match_patch::diff_cleanupSemantic(std::vector &diffs) { // Replace equality with a delete. *pointer = Diff(DELETE, lastequality); // Insert a corresponding an insert. - diffs.insert(pointer, Diff(INSERT, lastequality)); + pointer = std::next(diffs.insert(pointer, Diff(INSERT, lastequality))); equalities.pop_front(); // Throw away the equality we just deleted. if (!equalities.empty()) { @@ -851,7 +855,7 @@ void diff_match_patch::diff_cleanupSemantic(std::vector &diffs) { overlap_length1 >= insertion.length() / 2.0) { // Overlap found. Insert an equality and trim the surrounding edits. pointer--; - diffs.insert(pointer, Diff(EQUAL, insertion.substr(0, overlap_length1))); + pointer = std::next(diffs.insert(pointer, Diff(EQUAL, insertion.substr(0, overlap_length1)))); prevDiff->text = deletion.substr(0, deletion.length() - overlap_length1); thisDiff->text = safeMid(insertion, overlap_length1); @@ -864,7 +868,7 @@ void diff_match_patch::diff_cleanupSemantic(std::vector &diffs) { // Reverse overlap found. // Insert an equality and swap and trim the surrounding edits. pointer--; - diffs.insert(pointer, Diff(EQUAL, deletion.substr(0, overlap_length2))); + pointer = std::next(diffs.insert(pointer, Diff(EQUAL, deletion.substr(0, overlap_length2)))); prevDiff->operation = INSERT; prevDiff->text = insertion.substr(0, insertion.length() - overlap_length2); @@ -889,7 +893,7 @@ void diff_match_patch::diff_cleanupSemantic(std::vector &diffs) { namespace std { template -T * safe_next_element(const std::vector & v, typename std::vector::iterator & it) { +T * safe_next_element(const std::list & v, typename std::list::iterator & it) { if (it != v.end()) { T * p = &(*it); it++; @@ -901,20 +905,21 @@ T * safe_next_element(const std::vector & v, typename std::vector::iterato } -void diff_match_patch::diff_cleanupSemanticLossless(std::vector &diffs) { +void diff_match_patch::diff_cleanupSemanticLossless(std::list &diffs) { std::wstring equality1, edit, equality2; std::wstring commonString; int commonOffset; int score, bestScore; std::wstring bestEquality1, bestEdit, bestEquality2; // Create a new iterator at the start. - std::vector::iterator pointer = diffs.begin(); + std::list::iterator pointer = diffs.begin(); Diff *prevDiff = std::safe_next_element(diffs, pointer); Diff *thisDiff = std::safe_next_element(diffs, pointer); Diff *nextDiff = std::safe_next_element(diffs, pointer); // Intentionally ignore the first and last element (don't need checking). while (nextDiff != NULL) { + std::cout << "-------" << std::endl; if (prevDiff->operation == EQUAL && nextDiff->operation == EQUAL) { // This is a single edit surrounded by equalities. @@ -925,10 +930,20 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::vector &diffs) { // First, shift the edit as far left as possible. commonOffset = diff_commonSuffix(equality1, edit); if (commonOffset != 0) { + std::wcout << L"-------???" << commonString + << ", [" << equality1 + << "], [" << edit + << "], [" << equality2 << "]" + << std::endl; commonString = safeMid(edit, edit.length() - commonOffset); equality1 = equality1.substr(0, equality1.length() - commonOffset); edit = commonString + edit.substr(0, edit.length() - commonOffset); equality2 = commonString + equality2; + std::wcout << L"-------???" << commonString + << ", [" << equality1 + << "], [" << edit + << "], [" << equality2 << "]" + << std::endl; } // Second, step character by character right, looking for the best fit. @@ -952,21 +967,42 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::vector &diffs) { bestEquality2 = equality2; } } + std::wcout << L"***-------???" + << "[" << equality1 + << "], [" << edit + << "], [" << equality2 << "]" + << "---------[" << bestEquality1 + << "], [" << bestEdit + << "], [" << bestEquality2 << "]" + << std::endl; if (prevDiff->text != bestEquality1) { // We have an improvement, save it back to the diff. + std::wcout << L"++++++++++++++++++" + << nextDiff->text + << ", " << bestEquality2 + << ", " << &diffs.back() + << ", " << nextDiff + << std::endl; if (!bestEquality1.empty()) { prevDiff->text = bestEquality1; } else { std::advance(pointer, -3); - diffs.erase(pointer); - pointer = std::next(pointer);//pass next diff + pointer = diffs.erase(pointer); + std::advance(pointer, 2);//pass next diff } thisDiff->text = bestEdit; if (!bestEquality2.empty()) { nextDiff->text = bestEquality2; + std::wcout << L"++++++++++++++++++" + << nextDiff->text + << ", " << bestEquality2 + << ", " << &diffs.back() + << ", " << nextDiff + << std::endl; } else { - diffs.erase(pointer); // Delete nextDiff. + pointer = std::prev(pointer);//move back to nextDiff + pointer = diffs.erase(pointer); // Delete nextDiff. nextDiff = thisDiff; thisDiff = prevDiff; } @@ -991,6 +1027,7 @@ int diff_match_patch::diff_cleanupSemanticScore(const std::wstring &one, // 'whitespace'. Since this function's purpose is largely cosmetic, // the choice has been made to use each language's native features // rather than force total conformity. + std::wsmatch m; char char1 = one[one.length() - 1]; char char2 = two[0]; bool nonAlphaNumeric1 = !std::isalnum(char1); @@ -999,8 +1036,8 @@ int diff_match_patch::diff_cleanupSemanticScore(const std::wstring &one, bool whitespace2 = nonAlphaNumeric2 && std::isspace(char2); bool lineBreak1 = whitespace1 && std::iscntrl(char1); bool lineBreak2 = whitespace2 && std::iscntrl(char2); - bool blankLine1 = lineBreak1 && std::regex_match(one, BLANKLINEEND); - bool blankLine2 = lineBreak2 && std::regex_match(two, BLANKLINESTART); + bool blankLine1 = lineBreak1 && std::regex_search(one, m, BLANKLINEEND); + bool blankLine2 = lineBreak2 && std::regex_search(two, m, BLANKLINESTART); if (blankLine1 || blankLine2) { // Five points for blank lines. @@ -1027,14 +1064,14 @@ std::wregex diff_match_patch::BLANKLINEEND{L"\\n\\r?\\n$"}; std::wregex diff_match_patch::BLANKLINESTART{L"^\\r?\\n\\r?\\n"}; -void diff_match_patch::diff_cleanupEfficiency(std::vector &diffs) { +void diff_match_patch::diff_cleanupEfficiency(std::list &diffs) { if (diffs.empty()) { return; } bool changes = false; std::deque equalities; // Stack of equalities. std::wstring lastequality; // Always equal to equalities.lastElement().text - std::vector::iterator pointer = diffs.begin(); + std::list::iterator pointer = diffs.begin(); // Is there an insertion operation before the last equality. bool pre_ins = false; // Is there a deletion operation before the last equality. @@ -1093,7 +1130,8 @@ void diff_match_patch::diff_cleanupEfficiency(std::vector &diffs) { // Replace equality with a delete. *pointer = Diff(DELETE, lastequality); // Insert a corresponding an insert. - diffs.insert(pointer, Diff(INSERT, lastequality)); + pointer = std::next(diffs.insert(pointer, Diff(INSERT, lastequality))); + thisDiff = &(*std::prev(pointer)); equalities.pop_front(); // Throw away the equality we just deleted. @@ -1134,9 +1172,9 @@ void diff_match_patch::diff_cleanupEfficiency(std::vector &diffs) { } -void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { +void diff_match_patch::diff_cleanupMerge(std::list &diffs) { diffs.push_back(Diff(EQUAL, L"")); // Add a dummy entry at the end. - std::vector::iterator pointer = diffs.begin(); + std::list::iterator pointer = diffs.begin(); int count_delete = 0; int count_insert = 0; std::wstring text_delete = L""; @@ -1161,21 +1199,23 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { bool both_types = count_delete != 0 && count_insert != 0; // Delete the offending records. pointer = std::prev(pointer);//go back to thisDiff - pointer = diffs.erase(pointer - count_delete - count_insert, pointer); + + auto p2 = pointer; + std::advance(p2, - count_delete - count_insert); + pointer = diffs.erase(p2, pointer); if (both_types) { // Factor out any common prefixies. commonlength = diff_commonPrefix(text_insert, text_delete); if (commonlength != 0) { if (pointer != diffs.begin()) { - thisDiff = &(*(pointer - 1)); + thisDiff = &(*(std::prev(pointer))); if (thisDiff->operation != EQUAL) { throw "Previous diff should have been an equality."; } thisDiff->text += text_insert.substr(0, commonlength); } else { - diffs.insert(pointer, Diff(EQUAL, text_insert.substr(0, commonlength))); - pointer = std::next(pointer); + pointer = std::next(diffs.insert(pointer, Diff(EQUAL, text_insert.substr(0, commonlength)))); } text_insert = safeMid(text_insert, commonlength); text_delete = safeMid(text_delete, commonlength); @@ -1194,12 +1234,10 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { } // Insert the merged records. if (!text_delete.empty()) { - diffs.insert(pointer, Diff(DELETE, text_delete)); - pointer = std::next(pointer); + pointer = std::next(diffs.insert(pointer, Diff(DELETE, text_delete))); } if (!text_insert.empty()) { - diffs.insert(pointer, Diff(INSERT, text_insert)); - pointer = std::next(pointer); + pointer = std::next(diffs.insert(pointer, Diff(INSERT, text_insert))); } // Step forward to the equality. thisDiff = std::safe_next_element(diffs, pointer); @@ -1221,7 +1259,7 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { } if (diffs.back().text.empty()) { - diffs.erase(diffs.end() - 1); + diffs.erase(std::prev(diffs.end())); } /* @@ -1276,7 +1314,7 @@ void diff_match_patch::diff_cleanupMerge(std::vector &diffs) { } -int diff_match_patch::diff_xIndex(const std::vector &diffs, int loc) { +int diff_match_patch::diff_xIndex(const std::list &diffs, int loc) { int chars1 = 0; int chars2 = 0; int last_chars1 = 0; @@ -1308,7 +1346,7 @@ int diff_match_patch::diff_xIndex(const std::vector &diffs, int loc) { } -std::wstring diff_match_patch::diff_prettyHtml(const std::vector &diffs) { +std::wstring diff_match_patch::diff_prettyHtml(const std::list &diffs) { std::wstring html; std::wstring text; for(Diff aDiff : diffs) { @@ -1335,7 +1373,7 @@ std::wstring diff_match_patch::diff_prettyHtml(const std::vector &diffs) { } -std::wstring diff_match_patch::diff_text1(const std::vector &diffs) { +std::wstring diff_match_patch::diff_text1(const std::list &diffs) { std::wstring text; for(Diff aDiff: diffs) { if (aDiff.operation != INSERT) { @@ -1346,7 +1384,7 @@ std::wstring diff_match_patch::diff_text1(const std::vector &diffs) { } -std::wstring diff_match_patch::diff_text2(const std::vector &diffs) { +std::wstring diff_match_patch::diff_text2(const std::list &diffs) { std::wstring text; for(Diff aDiff : diffs) { if (aDiff.operation != DELETE) { @@ -1357,7 +1395,7 @@ std::wstring diff_match_patch::diff_text2(const std::vector &diffs) { } -int diff_match_patch::diff_levenshtein(const std::vector &diffs) { +int diff_match_patch::diff_levenshtein(const std::list &diffs) { int levenshtein = 0; int insertions = 0; int deletions = 0; @@ -1382,7 +1420,7 @@ int diff_match_patch::diff_levenshtein(const std::vector &diffs) { } -std::wstring diff_match_patch::diff_toDelta(const std::vector &diffs) { +std::wstring diff_match_patch::diff_toDelta(const std::list &diffs) { std::wstring text; for(Diff aDiff : diffs) { switch (aDiff.operation) { @@ -1410,9 +1448,9 @@ std::wstring diff_match_patch::diff_toDelta(const std::vector &diffs) { } -std::vector diff_match_patch::diff_fromDelta(const std::wstring &text1, +std::list diff_match_patch::diff_fromDelta(const std::wstring &text1, const std::wstring &delta) { - std::vector diffs; + std::list diffs; int pointer = 0; // Cursor in text1 std::wstring_list tokens = std::split(delta, '\t'); for(std::wstring token: tokens) { @@ -1663,7 +1701,7 @@ void diff_match_patch::patch_addContext(Patch &patch, const std::wstring &text) } -std::vector diff_match_patch::patch_make(const std::wstring &text1, +std::list diff_match_patch::patch_make(const std::wstring &text1, const std::wstring &text2) { // Check for null inputs. if (text1.empty() || text2.empty()) { @@ -1671,7 +1709,7 @@ std::vector diff_match_patch::patch_make(const std::wstring &text1, } // No diffs provided, compute our own. - std::vector diffs = diff_main(text1, text2, true); + std::list diffs = diff_main(text1, text2, true); if (diffs.size() > 2) { diff_cleanupSemantic(diffs); diff_cleanupEfficiency(diffs); @@ -1681,16 +1719,16 @@ std::vector diff_match_patch::patch_make(const std::wstring &text1, } -std::vector diff_match_patch::patch_make(const std::vector &diffs) { +std::list diff_match_patch::patch_make(const std::list &diffs) { // No origin string provided, compute our own. const std::wstring text1 = diff_text1(diffs); return patch_make(text1, diffs); } -std::vector diff_match_patch::patch_make(const std::wstring &text1, +std::list diff_match_patch::patch_make(const std::wstring &text1, const std::wstring &text2, - const std::vector &diffs) { + const std::list &diffs) { // text2 is entirely unused. return patch_make(text1, diffs); @@ -1698,14 +1736,14 @@ std::vector diff_match_patch::patch_make(const std::wstring &text1, } -std::vector diff_match_patch::patch_make(const std::wstring &text1, - const std::vector &diffs) { +std::list diff_match_patch::patch_make(const std::wstring &text1, + const std::list &diffs) { // Check for null inputs. if (text1.empty()) { throw "Null inputs. (patch_make)"; } - std::vector patches; + std::list patches; if (diffs.empty()) { return patches; // Get rid of the null case. } @@ -1781,8 +1819,8 @@ std::vector diff_match_patch::patch_make(const std::wstring &text1, } -std::vector diff_match_patch::patch_deepCopy(std::vector &patches) { - std::vector patchesCopy; +std::list diff_match_patch::patch_deepCopy(std::list &patches) { + std::list patchesCopy; for(Patch aPatch: patches) { Patch patchCopy = Patch(); for(Diff aDiff: aPatch.diffs) { @@ -1800,14 +1838,14 @@ std::vector diff_match_patch::patch_deepCopy(std::vector &patches) std::pair > diff_match_patch::patch_apply( - std::vector &patches, const std::wstring &sourceText) { + std::list &patches, const std::wstring &sourceText) { std::wstring text = sourceText; // Copy to preserve original. if (patches.empty()) { return std::pair >(text, std::vector(0)); } // Deep copy the patches so that no changes are made to originals. - std::vector patchesCopy = patch_deepCopy(patches); + std::list patchesCopy = patch_deepCopy(patches); std::wstring nullPadding = patch_addPadding(patchesCopy); text = nullPadding + text + nullPadding; @@ -1862,7 +1900,7 @@ std::pair > diff_match_patch::patch_apply( } else { // Imperfect match. Run a diff to get a framework of equivalent // indices. - std::vector diffs = diff_main(text1, text2, false); + std::list diffs = diff_main(text1, text2, false); if (text1.length() > Match_MaxBits && diff_levenshtein(diffs) / static_cast (text1.length()) > Patch_DeleteThreshold) { @@ -1901,7 +1939,7 @@ std::pair > diff_match_patch::patch_apply( } -std::wstring diff_match_patch::patch_addPadding(std::vector &patches) { +std::wstring diff_match_patch::patch_addPadding(std::list &patches) { short paddingLength = Patch_Margin; std::wstring nullPadding = L""; for (short x = 1; x <= paddingLength; x++) { @@ -1918,7 +1956,7 @@ std::wstring diff_match_patch::patch_addPadding(std::vector &patches) { // Add some padding on start of first diff. Patch &firstPatch = patches.front(); - std::vector &firstPatchDiffs = firstPatch.diffs; + std::list &firstPatchDiffs = firstPatch.diffs; if (firstPatchDiffs.empty() || firstPatchDiffs.front().operation != EQUAL) { // Add nullPadding equality. firstPatchDiffs.insert(firstPatchDiffs.begin(), Diff(EQUAL, nullPadding)); @@ -1940,7 +1978,7 @@ std::wstring diff_match_patch::patch_addPadding(std::vector &patches) { // Add some padding on end of last diff. Patch &lastPatch = patches.front(); - std::vector &lastPatchDiffs = lastPatch.diffs; + std::list &lastPatchDiffs = lastPatch.diffs; if (lastPatchDiffs.empty() || lastPatchDiffs.back().operation != EQUAL) { // Add nullPadding equality. lastPatchDiffs.push_back(Diff(EQUAL, nullPadding)); @@ -1959,7 +1997,7 @@ std::wstring diff_match_patch::patch_addPadding(std::vector &patches) { } -void diff_match_patch::patch_splitMax(std::vector &patches) { +void diff_match_patch::patch_splitMax(std::list &patches) { short patch_size = Match_MaxBits; std::wstring precontext, postcontext; Patch patch; @@ -1983,9 +2021,7 @@ void diff_match_patch::patch_splitMax(std::vector &patches) { continue; } // Remove the big old patch. - auto tmp_pointer = pointer + 1; - patches.erase(pointer); - pointer = tmp_pointer; + pointer = patches.erase(pointer); start1 = bigpatch.start1; start2 = bigpatch.start2; precontext = L""; @@ -2060,7 +2096,7 @@ void diff_match_patch::patch_splitMax(std::vector &patches) { } } if (!empty) { - patches.insert(pointer, patch); + pointer = std::next(patches.insert(pointer, patch)); } } if (pointer != patches.end()) @@ -2071,7 +2107,7 @@ void diff_match_patch::patch_splitMax(std::vector &patches) { } -std::wstring diff_match_patch::patch_toText(const std::vector &patches) { +std::wstring diff_match_patch::patch_toText(const std::list &patches) { std::wstring text; for(Patch aPatch: patches) { text.append(aPatch.toString()); @@ -2080,8 +2116,8 @@ std::wstring diff_match_patch::patch_toText(const std::vector &patches) { } -std::vector diff_match_patch::patch_fromText(const std::wstring &textline) { - std::vector patches; +std::list diff_match_patch::patch_fromText(const std::wstring &textline) { + std::list patches; if (textline.empty()) { return patches; } @@ -2144,7 +2180,7 @@ std::vector diff_match_patch::patch_fromText(const std::wstring &textline } else { // WTF? throw std::format(L"Invalid patch mode '%c' in: %ls", sign, line.c_str()); - return std::vector(); + return std::list(); } text.erase(text.begin()); } diff --git a/cpp/diff_match_patch.h b/cpp/diff_match_patch.h index e7f269f..62c9b13 100644 --- a/cpp/diff_match_patch.h +++ b/cpp/diff_match_patch.h @@ -27,7 +27,7 @@ * @author fraser@google.com (Neil Fraser) * #include - #include + #include #include #include #include @@ -94,7 +94,7 @@ class Diff { */ class Patch { public: - std::vector diffs; + std::list diffs; int start1; int start2; int length1; @@ -164,7 +164,7 @@ class diff_match_patch { * @param text2 New string to be diffed. * @return Linked List of Diff objects. */ - std::vector diff_main(const std::wstring &text1, const std::wstring &text2); + std::list diff_main(const std::wstring &text1, const std::wstring &text2); /** * Find the differences between two texts. @@ -175,7 +175,7 @@ class diff_match_patch { * If true, then run a faster slightly less optimal diff. * @return Linked List of Diff objects. */ - std::vector diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines); + std::list diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines); /** * Find the differences between two texts. Simplifies the problem by @@ -190,7 +190,7 @@ class diff_match_patch { * @return Linked List of Diff objects. */ private: - std::vector diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline); + std::list diff_main(const std::wstring &text1, const std::wstring &text2, bool checklines, clock_t deadline); /** * Find the differences between two texts. Assumes that the texts do not @@ -204,7 +204,7 @@ class diff_match_patch { * @return Linked List of Diff objects. */ private: - std::vector diff_compute(std::wstring text1, std::wstring text2, bool checklines, clock_t deadline); + std::list diff_compute(std::wstring text1, std::wstring text2, bool checklines, clock_t deadline); /** * Do a quick line-level diff on both strings, then rediff the parts for @@ -216,7 +216,7 @@ class diff_match_patch { * @return Linked List of Diff objects. */ private: - std::vector diff_lineMode(std::wstring text1, std::wstring text2, clock_t deadline); + std::list diff_lineMode(std::wstring text1, std::wstring text2, clock_t deadline); /** * Find the 'middle snake' of a diff, split the problem in two @@ -227,7 +227,7 @@ class diff_match_patch { * @return Linked List of Diff objects. */ protected: - std::vector diff_bisect(const std::wstring &text1, const std::wstring &text2, clock_t deadline); + std::list diff_bisect(const std::wstring &text1, const std::wstring &text2, clock_t deadline); /** * Given the location of the 'middle snake', split the diff in two parts @@ -240,7 +240,7 @@ class diff_match_patch { * @return LinkedList of Diff objects. */ private: - std::vector diff_bisectSplit(const std::wstring &text1, const std::wstring &text2, int x, int y, clock_t deadline); + std::list diff_bisectSplit(const std::wstring &text1, const std::wstring &text2, int x, int y, clock_t deadline); /** * Split two texts into a list of strings. Reduce the texts to a string of @@ -252,7 +252,7 @@ class diff_match_patch { * of the List of unique strings is intentionally blank. */ protected: - std::vector diff_linesToChars(const std::wstring &text1, const std::wstring &text2); // return elems 0 and 1 are std::wstring, elem 2 is std::wstring_list + std::list diff_linesToChars(const std::wstring &text1, const std::wstring &text2); // return elems 0 and 1 are std::wstring, elem 2 is std::wstring_list /** * Split a text into a list of strings. Reduce the texts to a string of @@ -273,7 +273,7 @@ class diff_match_patch { * @param lineArray List of unique strings. */ private: - void diff_charsToLines(std::vector &diffs, const std::wstring_list &lineArray); + void diff_charsToLines(std::list &diffs, const std::wstring_list &lineArray); /** * Determine the common prefix of two strings. @@ -334,7 +334,7 @@ class diff_match_patch { * @param diffs LinkedList of Diff objects. */ public: - void diff_cleanupSemantic(std::vector &diffs); + void diff_cleanupSemantic(std::list &diffs); /** * Look for single edits surrounded on both sides by equalities @@ -343,7 +343,7 @@ class diff_match_patch { * @param diffs LinkedList of Diff objects. */ public: - void diff_cleanupSemanticLossless(std::vector &diffs); + void diff_cleanupSemanticLossless(std::list &diffs); /** * Given two strings, compute a score representing whether the internal @@ -361,7 +361,7 @@ class diff_match_patch { * @param diffs LinkedList of Diff objects. */ public: - void diff_cleanupEfficiency(std::vector &diffs); + void diff_cleanupEfficiency(std::list &diffs); /** * Reorder and merge like edit sections. Merge equalities. @@ -369,7 +369,7 @@ class diff_match_patch { * @param diffs LinkedList of Diff objects. */ public: - void diff_cleanupMerge(std::vector &diffs); + void diff_cleanupMerge(std::list &diffs); /** * loc is a location in text1, compute and return the equivalent location in @@ -380,7 +380,7 @@ class diff_match_patch { * @return Location within text2. */ public: - int diff_xIndex(const std::vector &diffs, int loc); + int diff_xIndex(const std::list &diffs, int loc); /** * Convert a Diff list into a pretty HTML report. @@ -388,7 +388,7 @@ class diff_match_patch { * @return HTML representation. */ public: - std::wstring diff_prettyHtml(const std::vector &diffs); + std::wstring diff_prettyHtml(const std::list &diffs); /** * Compute and return the source text (all equalities and deletions). @@ -396,7 +396,7 @@ class diff_match_patch { * @return Source text. */ public: - std::wstring diff_text1(const std::vector &diffs); + std::wstring diff_text1(const std::list &diffs); /** * Compute and return the destination text (all equalities and insertions). @@ -404,7 +404,7 @@ class diff_match_patch { * @return Destination text. */ public: - std::wstring diff_text2(const std::vector &diffs); + std::wstring diff_text2(const std::list &diffs); /** * Compute the Levenshtein distance; the number of inserted, deleted or @@ -413,7 +413,7 @@ class diff_match_patch { * @return Number of changes. */ public: - int diff_levenshtein(const std::vector &diffs); + int diff_levenshtein(const std::list &diffs); /** * Crush the diff into an encoded string which describes the operations @@ -424,7 +424,7 @@ class diff_match_patch { * @return Delta text. */ public: - std::wstring diff_toDelta(const std::vector &diffs); + std::wstring diff_toDelta(const std::list &diffs); /** * Given the original text1, and an encoded string which describes the @@ -435,7 +435,7 @@ class diff_match_patch { * @throws std::wstring If invalid input. */ public: - std::vector diff_fromDelta(const std::wstring &text1, const std::wstring &delta); + std::list diff_fromDelta(const std::wstring &text1, const std::wstring &delta); // MATCH FUNCTIONS @@ -503,7 +503,7 @@ class diff_match_patch { * @return LinkedList of Patch objects. */ public: - std::vector patch_make(const std::wstring &text1, const std::wstring &text2); + std::list patch_make(const std::wstring &text1, const std::wstring &text2); /** * Compute a list of patches to turn text1 into text2. @@ -512,7 +512,7 @@ class diff_match_patch { * @return LinkedList of Patch objects. */ public: - std::vector patch_make(const std::vector &diffs); + std::list patch_make(const std::list &diffs); /** * Compute a list of patches to turn text1 into text2. @@ -521,10 +521,10 @@ class diff_match_patch { * @param text2 Ignored. * @param diffs Array of diff tuples for text1 to text2. * @return LinkedList of Patch objects. - * @deprecated Prefer patch_make(const std::wstring &text1, const std::vector &diffs). + * @deprecated Prefer patch_make(const std::wstring &text1, const std::list &diffs). */ public: - std::vector patch_make(const std::wstring &text1, const std::wstring &text2, const std::vector &diffs); + std::list patch_make(const std::wstring &text1, const std::wstring &text2, const std::list &diffs); /** * Compute a list of patches to turn text1 into text2. @@ -534,7 +534,7 @@ class diff_match_patch { * @return LinkedList of Patch objects. */ public: - std::vector patch_make(const std::wstring &text1, const std::vector &diffs); + std::list patch_make(const std::wstring &text1, const std::list &diffs); /** * Given an array of patches, return another array that is identical. @@ -542,7 +542,7 @@ class diff_match_patch { * @return Array of patch objects. */ public: - std::vector patch_deepCopy(std::vector &patches); + std::list patch_deepCopy(std::list &patches); /** * Merge a set of patches onto the text. Return a patched text, as well @@ -553,7 +553,7 @@ class diff_match_patch { * boolean values. */ public: - std::pair > patch_apply(std::vector &patches, const std::wstring &text); + std::pair > patch_apply(std::list &patches, const std::wstring &text); /** * Add some padding on text start and end so that edges can match something. @@ -562,7 +562,7 @@ class diff_match_patch { * @return The padding string added to each side. */ public: - std::wstring patch_addPadding(std::vector &patches); + std::wstring patch_addPadding(std::list &patches); /** * Look through the patches and break up any which are longer than the @@ -571,7 +571,7 @@ class diff_match_patch { * @param patches LinkedList of Patch objects. */ public: - void patch_splitMax(std::vector &patches); + void patch_splitMax(std::list &patches); /** * Take a list of patches and return a textual representation. @@ -579,7 +579,7 @@ class diff_match_patch { * @return Text representation of patches. */ public: - std::wstring patch_toText(const std::vector &patches); + std::wstring patch_toText(const std::list &patches); /** * Parse a textual representation of patches and return a List of Patch @@ -589,7 +589,7 @@ class diff_match_patch { * @throws std::wstring If invalid input. */ public: - std::vector patch_fromText(const std::wstring &textline); + std::list patch_fromText(const std::wstring &textline); /** * A safer version of std::wstring.mid(pos). This one returns "" instead of diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index 02aac9a..cc2464d 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -18,7 +18,7 @@ // Code known to compile and run with Qt 4.3 through Qt 4.7. #include -#include +#include #include #include "diff_match_patch.h" #include "diff_match_patch_test.h" @@ -142,54 +142,54 @@ void diff_match_patch_test::testDiffHalfmatch() { void diff_match_patch_test::testDiffLinesToChars() { // Convert lines down to characters. - std::wstring_list tmpVector; - std::vector tmpVarList; - tmpVector.push_back(L""); - tmpVector.push_back(L"alpha\n"); - tmpVector.push_back(L"beta\n"); + std::wstring_list tmpList; + std::list tmpVarList; + tmpList.push_back(L""); + tmpList.push_back(L"alpha\n"); + tmpList.push_back(L"beta\n"); tmpVarList.push_back(std::dmp_variant(L"\u0001\u0002\u0001")); //((L"\u0001\u0002\u0001")); tmpVarList.push_back(std::dmp_variant(L"\u0002\u0001\u0002")); // ((L"\u0002\u0001\u0002")); - tmpVarList.push_back(std::dmp_variant(tmpVector)); + tmpVarList.push_back(std::dmp_variant(tmpList)); assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"alpha\nbeta\nalpha\n", L"beta\nalpha\nbeta\n")); - tmpVector.clear(); + tmpList.clear(); tmpVarList.clear(); - tmpVector.push_back(L""); - tmpVector.push_back(L"alpha\r\n"); - tmpVector.push_back(L"beta\r\n"); - tmpVector.push_back(L"\r\n"); + tmpList.push_back(L""); + tmpList.push_back(L"alpha\r\n"); + tmpList.push_back(L"beta\r\n"); + tmpList.push_back(L"\r\n"); tmpVarList.push_back(std::dmp_variant(std::wstring(L""))); tmpVarList.push_back(std::dmp_variant(L"\u0001\u0002\u0003\u0003")); // ((L"\u0001\u0002\u0003\u0003")); - tmpVarList.push_back(std::dmp_variant(tmpVector)); + tmpVarList.push_back(std::dmp_variant(tmpList)); assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"", L"alpha\r\nbeta\r\n\r\n\r\n")); - tmpVector.clear(); + tmpList.clear(); tmpVarList.clear(); - tmpVector.push_back(L""); - tmpVector.push_back(L"a"); - tmpVector.push_back(L"b"); + tmpList.push_back(L""); + tmpList.push_back(L"a"); + tmpList.push_back(L"b"); tmpVarList.push_back(std::dmp_variant(L"\u0001")); // ((L"\u0001"))); tmpVarList.push_back(std::dmp_variant(L"\u0002")); // ((L"\u0002")); - tmpVarList.push_back(std::dmp_variant(tmpVector)); + tmpVarList.push_back(std::dmp_variant(tmpList)); assertEquals(L"diff_linesToChars:", tmpVarList, dmp.diff_linesToChars(L"a", L"b")); // More than 256 to reveal any 8-bit limitations. int n = 300; - tmpVector.clear(); + tmpList.clear(); tmpVarList.clear(); std::wstring lines; std::wstring chars; for (int x = 1; x < n + 1; x++) { - tmpVector.push_back(std::to_wstring(x) + L"\n"); + tmpList.push_back(std::to_wstring(x) + L"\n"); lines += std::to_wstring(x) + L"\n"; chars += wchar_t(x); } - assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); + assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpList.size()); assertEquals(L"diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.insert(tmpVector.begin(), L""); + tmpList.insert(tmpList.begin(), L""); tmpVarList.push_back(std::dmp_variant(chars)); tmpVarList.push_back(std::dmp_variant(std::wstring(L""))); - tmpVarList.push_back(std::dmp_variant(tmpVector)); + tmpVarList.push_back(std::dmp_variant(tmpList)); assertEquals(L"diff_linesToChars: More than 256.", tmpVarList, dmp.diff_linesToChars(lines, L"")); } @@ -200,38 +200,38 @@ void diff_match_patch_test::testDiffCharsToLines() { assertEquals(L"diff_charsToLines:", Diff(EQUAL, L"a"), Diff(EQUAL, L"a")); // Convert chars up to lines. - std::vector diffs; + std::list diffs; diffs.push_back(Diff(EQUAL, L"\u0001\u0002\u0001")); // (L"\u0001\u0002\u0001"); diffs.push_back(Diff(INSERT, L"\u0002\u0001\u0002")); // (L"\u0002\u0001\u0002"); - std::wstring_list tmpVector; - tmpVector.push_back(L""); - tmpVector.push_back(L"alpha\n"); - tmpVector.push_back(L"beta\n"); - dmp.diff_charsToLines(diffs, tmpVector); + std::wstring_list tmpList; + tmpList.push_back(L""); + tmpList.push_back(L"alpha\n"); + tmpList.push_back(L"beta\n"); + dmp.diff_charsToLines(diffs, tmpList); assertEquals(L"diff_charsToLines:", diffList(Diff(EQUAL, L"alpha\nbeta\nalpha\n"), Diff(INSERT, L"beta\nalpha\nbeta\n")), diffs); // More than 256 to reveal any 8-bit limitations. int n = 300; - tmpVector.clear(); - std::vector tmpVarList; + tmpList.clear(); + std::list tmpVarList; std::wstring lines; std::wstring chars; for (int x = 1; x < n + 1; x++) { - tmpVector.push_back(std::to_wstring(x) + L"\n"); + tmpList.push_back(std::to_wstring(x) + L"\n"); lines += std::to_wstring(x) + L"\n"; chars += wchar_t(x); } - assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpVector.size()); + assertEquals(L"diff_linesToChars: More than 256 (setup).", n, tmpList.size()); assertEquals(L"diff_linesToChars: More than 256 (setup).", n, chars.length()); - tmpVector.insert(tmpVector.begin(), L""); + tmpList.insert(tmpList.begin(), L""); diffs = diffList(Diff(DELETE, chars)); - dmp.diff_charsToLines(diffs, tmpVector); + dmp.diff_charsToLines(diffs, tmpList); assertEquals(L"diff_charsToLines: More than 256.", diffList(Diff(DELETE, lines)), diffs); } void diff_match_patch_test::testDiffCleanupMerge() { // Cleanup a messy diff. - std::vector diffs; + std::list diffs; dmp.diff_cleanupMerge(diffs); assertEquals(L"diff_cleanupMerge: Null case.", diffList(), diffs); @@ -282,7 +282,7 @@ void diff_match_patch_test::testDiffCleanupMerge() { void diff_match_patch_test::testDiffCleanupSemanticLossless() { // Slide diffs to match logical boundaries. - std::vector diffs = diffList(); + std::list diffs = diffList(); dmp.diff_cleanupSemanticLossless(diffs); assertEquals(L"diff_cleanupSemantic: Null case.", diffList(), diffs); @@ -317,7 +317,7 @@ void diff_match_patch_test::testDiffCleanupSemanticLossless() { void diff_match_patch_test::testDiffCleanupSemantic() { // Cleanup semantically trivial equalities. - std::vector diffs = diffList(); + std::list diffs = diffList(); dmp.diff_cleanupSemantic(diffs); assertEquals(L"diff_cleanupSemantic: Null case.", diffList(), diffs); @@ -365,7 +365,7 @@ void diff_match_patch_test::testDiffCleanupSemantic() { void diff_match_patch_test::testDiffCleanupEfficiency() { // Cleanup operationally trivial equalities. dmp.Diff_EditCost = 4; - std::vector diffs = diffList(); + std::list diffs = diffList(); dmp.diff_cleanupEfficiency(diffs); assertEquals(L"diff_cleanupEfficiency: Null case.", diffList(), diffs); @@ -394,20 +394,20 @@ void diff_match_patch_test::testDiffCleanupEfficiency() { void diff_match_patch_test::testDiffPrettyHtml() { // Pretty print. - std::vector diffs = diffList(Diff(EQUAL, L"a\n"), Diff(DELETE, L"b"), Diff(INSERT, L"c&d")); + std::list diffs = diffList(Diff(EQUAL, L"a\n"), Diff(DELETE, L"b"), Diff(INSERT, L"c&d")); assertEquals(L"diff_prettyHtml:", L"
<B>b</B>c&d", dmp.diff_prettyHtml(diffs)); } void diff_match_patch_test::testDiffText() { // Compute the source and destination texts. - std::vector diffs = diffList(Diff(EQUAL, L"jump"), Diff(DELETE, L"s"), Diff(INSERT, L"ed"), Diff(EQUAL, L" over "), Diff(DELETE, L"the"), Diff(INSERT, L"a"), Diff(EQUAL, L" lazy")); + std::list diffs = diffList(Diff(EQUAL, L"jump"), Diff(DELETE, L"s"), Diff(INSERT, L"ed"), Diff(EQUAL, L" over "), Diff(DELETE, L"the"), Diff(INSERT, L"a"), Diff(EQUAL, L" lazy")); assertEquals(L"diff_text1:", L"jumps over the lazy", dmp.diff_text1(diffs)); assertEquals(L"diff_text2:", L"jumped over a lazy", dmp.diff_text2(diffs)); } void diff_match_patch_test::testDiffDelta() { // Convert a diff into delta string. - std::vector diffs = diffList(Diff(EQUAL, L"jump"), Diff(DELETE, L"s"), Diff(INSERT, L"ed"), Diff(EQUAL, L" over "), Diff(DELETE, L"the"), Diff(INSERT, L"a"), Diff(EQUAL, L" lazy"), Diff(INSERT, L"old dog")); + std::list diffs = diffList(Diff(EQUAL, L"jump"), Diff(DELETE, L"s"), Diff(INSERT, L"ed"), Diff(EQUAL, L" over "), Diff(DELETE, L"the"), Diff(INSERT, L"a"), Diff(EQUAL, L" lazy"), Diff(INSERT, L"old dog")); std::wstring text1 = dmp.diff_text1(diffs); assertEquals(L"diff_text1: Base text.", L"jumps over the lazy", text1); @@ -467,7 +467,7 @@ void diff_match_patch_test::testDiffDelta() { void diff_match_patch_test::testDiffXIndex() { // Translate a location in text1 to text2. - std::vector diffs = diffList(Diff(DELETE, L"a"), Diff(INSERT, L"1234"), Diff(EQUAL, L"xyz")); + std::list diffs = diffList(Diff(DELETE, L"a"), Diff(INSERT, L"1234"), Diff(EQUAL, L"xyz")); assertEquals(L"diff_xIndex: Translation on equality.", 5, dmp.diff_xIndex(diffs, 2)); diffs = diffList(Diff(EQUAL, L"a"), Diff(DELETE, L"1234"), Diff(EQUAL, L"xyz")); @@ -475,7 +475,7 @@ void diff_match_patch_test::testDiffXIndex() { } void diff_match_patch_test::testDiffLevenshtein() { - std::vector diffs = diffList(Diff(DELETE, L"abc"), Diff(INSERT, L"1234"), Diff(EQUAL, L"xyz")); + std::list diffs = diffList(Diff(DELETE, L"abc"), Diff(INSERT, L"1234"), Diff(EQUAL, L"xyz")); assertEquals(L"diff_levenshtein: Trailing equality.", 4, dmp.diff_levenshtein(diffs)); diffs = diffList(Diff(EQUAL, L"xyz"), Diff(DELETE, L"abc"), Diff(INSERT, L"1234")); @@ -492,7 +492,7 @@ void diff_match_patch_test::testDiffBisect() { // Since the resulting diff hasn't been normalized, it would be ok if // the insertion and deletion pairs are swapped. // If the order changes, tweak this test as required. - std::vector diffs = diffList(Diff(DELETE, L"c"), Diff(INSERT, L"m"), Diff(EQUAL, L"a"), Diff(DELETE, L"t"), Diff(INSERT, L"p")); + std::list diffs = diffList(Diff(DELETE, L"c"), Diff(INSERT, L"m"), Diff(EQUAL, L"a"), Diff(DELETE, L"t"), Diff(INSERT, L"p")); assertEquals(L"diff_bisect: Normal.", diffs, dmp.diff_bisect(a, b, std::numeric_limits::max())); // Timeout. @@ -502,7 +502,7 @@ void diff_match_patch_test::testDiffBisect() { void diff_match_patch_test::testDiffMain() { // Perform a trivial diff. - std::vector diffs = diffList(); + std::list diffs = diffList(); assertEquals(L"diff_main: Null case.", diffs, dmp.diff_main(L"", L"", false)); diffs = diffList(Diff(EQUAL, L"abc")); @@ -698,13 +698,13 @@ void diff_match_patch_test::testPatchFromText() { assertTrue(L"patch_fromText: #0.", dmp.patch_fromText(L"").size() == 0); std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; - assertEquals(L"patch_fromText: #1.", strp, dmp.patch_fromText(strp)[0].toString()); + assertEquals(L"patch_fromText: #1.", strp, dmp.patch_fromText(strp).begin()->toString()); - assertEquals(L"patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText(L"@@ -1 +1 @@\n-a\n+b\n")[0].toString()); + assertEquals(L"patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", dmp.patch_fromText(L"@@ -1 +1 @@\n-a\n+b\n").begin()->toString()); - assertEquals(L"patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText(L"@@ -1,3 +0,0 @@\n-abc\n")[0].toString()); + assertEquals(L"patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", dmp.patch_fromText(L"@@ -1,3 +0,0 @@\n-abc\n").begin()->toString()); - assertEquals(L"patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText(L"@@ -0,0 +1,3 @@\n+abc\n")[0].toString()); + assertEquals(L"patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText(L"@@ -0,0 +1,3 @@\n+abc\n").begin()->toString()); // Generates error. try { @@ -717,7 +717,7 @@ void diff_match_patch_test::testPatchFromText() { void diff_match_patch_test::testPatchToText() { std::wstring strp = L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; - std::vector patches; + std::list patches; patches = dmp.patch_fromText(strp); assertEquals(L"patch_toText: Single", strp, dmp.patch_toText(patches)); @@ -728,26 +728,25 @@ void diff_match_patch_test::testPatchToText() { void diff_match_patch_test::testPatchAddContext() { dmp.Patch_Margin = 4; - Patch p; - p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + Patch & p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n").front(); dmp.patch_addContext(p, L"The quick brown fox jumps over the lazy dog."); assertEquals(L"patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); - p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n").front(); dmp.patch_addContext(p, L"The quick brown fox jumps."); assertEquals(L"patch_addContext: Not enough trailing context.", L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", p.toString()); - p = dmp.patch_fromText(L"@@ -3 +3,2 @@\n-e\n+at\n")[0]; + p = dmp.patch_fromText(L"@@ -3 +3,2 @@\n-e\n+at\n").front(); dmp.patch_addContext(p, L"The quick brown fox jumps."); assertEquals(L"patch_addContext: Not enough leading context.", L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); - p = dmp.patch_fromText(L"@@ -3 +3,2 @@\n-e\n+at\n")[0]; + p = dmp.patch_fromText(L"@@ -3 +3,2 @@\n-e\n+at\n").front(); dmp.patch_addContext(p, L"The quick brown fox jumps. The quick brown fox crashes."); assertEquals(L"patch_addContext: Ambiguity.", L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", p.toString()); } void diff_match_patch_test::testPatchMake() { - std::vector patches; + std::list patches; patches = dmp.patch_make(L"", L""); assertEquals(L"patch_make: Null case", L"", dmp.patch_toText(patches)); @@ -762,7 +761,7 @@ void diff_match_patch_test::testPatchMake() { patches = dmp.patch_make(text1, text2); assertEquals(L"patch_make: Text1+Text2 inputs", expectedPatch, dmp.patch_toText(patches)); - std::vector diffs = dmp.diff_main(text1, text2, false); + std::list diffs = dmp.diff_main(text1, text2, false); patches = dmp.patch_make(diffs); assertEquals(L"patch_make: Diff input", expectedPatch, dmp.patch_toText(patches)); @@ -776,7 +775,7 @@ void diff_match_patch_test::testPatchMake() { assertEquals(L"patch_toText: Character encoding.", L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", dmp.patch_toText(patches)); diffs = diffList(Diff(DELETE, L"`1234567890-=[]\\;',./"), Diff(INSERT, L"~!@#$%^&*()_+{}|:\"<>?")); - assertEquals(L"patch_fromText: Character decoding.", diffs, dmp.patch_fromText(L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0].diffs); + assertEquals(L"patch_fromText: Character decoding.", diffs, dmp.patch_fromText(L"@@ -1,21 +1,21 @@\n-%601234567890-=%5B%5D%5C;',./\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n").front().diffs); text1 = L""; for (int x = 0; x < 100; x++) { @@ -798,7 +797,7 @@ void diff_match_patch_test::testPatchMake() { void diff_match_patch_test::testPatchSplitMax() { // Assumes that Match_MaxBits is 32. - std::vector patches; + std::list patches; patches = dmp.patch_make(L"abcdefghijklmnopqrstuvwxyz01234567890", L"XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); dmp.patch_splitMax(patches); assertEquals(L"patch_splitMax: #1.", L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", dmp.patch_toText(patches)); @@ -818,7 +817,7 @@ void diff_match_patch_test::testPatchSplitMax() { } void diff_match_patch_test::testPatchAddPadding() { - std::vector patches; + std::list patches; patches = dmp.patch_make(L"", L"test"); assertEquals(L"patch_addPadding: Both edges full.", L"@@ -0,0 +1,4 @@\n+test\n", dmp.patch_toText(patches)); dmp.patch_addPadding(patches); @@ -839,7 +838,7 @@ void diff_match_patch_test::testPatchApply() { dmp.Match_Distance = 1000; dmp.Match_Threshold = 0.5f; dmp.Patch_DeleteThreshold = 0.5f; - std::vector patches; + std::list patches; patches = dmp.patch_make(L"", L""); std::pair > results = dmp.patch_apply(patches, L"Hello world."); std::vector boolArray = results.second; @@ -949,12 +948,14 @@ void diff_match_patch_test::assertEquals(const std::wstring &strCase, const Diff std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2) { +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::list &list1, const std::list &list2) { bool fail = false; if (list1.size() == list2.size()) { int i = 0; - for(Diff d1: list1) { - Diff d2 = list2[i]; + for(const Diff & d1: list1) { + auto it = list2.begin(); + std::advance(it, i); + const Diff & d2 = *it; if (d1 != d2) { fail = true; break; @@ -994,12 +995,15 @@ void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std: std::debug_print(L"%ls OK", qPrintable(strCase)); } -void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2) { +void diff_match_patch_test::assertEquals(const std::wstring &strCase, const std::list &list1, const std::list &list2) { bool fail = false; if (list1.size() == list2.size()) { int i = 0; - for(std::dmp_variant q1: list1) { - std::dmp_variant q2 = list2[i]; + for(const auto & q1: list1) { + auto it = list2.begin(); + std::advance(it, i); + + const auto & q2 = *it; if (q1 != q2) { std::debug_print(L"variable %d not equal, (%ls)_____________ (%ls)", i, qPrintable(var_to_string(q1)), qPrintable(var_to_string(q2))); fail = true; @@ -1103,7 +1107,7 @@ void diff_match_patch_test::assertFalse(const std::wstring &strCase, bool value) // Construct the two texts which made up the diff originally. -std::wstring_list diff_match_patch_test::diff_rebuildtexts(std::vector diffs) { +std::wstring_list diff_match_patch_test::diff_rebuildtexts(std::list diffs) { std::wstring_list text; text.push_back(std::wstring(L"")); text.push_back(std::wstring(L"")); @@ -1126,10 +1130,10 @@ void diff_match_patch_test::assertEmpty(const std::wstring &strCase, const std:: // Private function for quickly building lists of diffs. -std::vector diff_match_patch_test::diffList(Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, +std::list diff_match_patch_test::diffList(Diff d1, Diff d2, Diff d3, Diff d4, Diff d5, Diff d6, Diff d7, Diff d8, Diff d9, Diff d10) { // Diff(INSERT, NULL) is invalid and thus is used as the default argument. - std::vector listRet; + std::list listRet; if (d1.operation == INSERT && d1.invalid) { return listRet; } diff --git a/cpp/diff_match_patch_test.h b/cpp/diff_match_patch_test.h index f4bdc51..88f7fc4 100644 --- a/cpp/diff_match_patch_test.h +++ b/cpp/diff_match_patch_test.h @@ -65,8 +65,8 @@ class diff_match_patch_test { void assertEquals(const std::wstring &strCase, int n1, int n2); void assertEquals(const std::wstring &strCase, const std::wstring &s1, const std::wstring &s2); void assertEquals(const std::wstring &strCase, const Diff &d1, const Diff &d2); - void assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2); - void assertEquals(const std::wstring &strCase, const std::vector &list1, const std::vector &list2); + void assertEquals(const std::wstring &strCase, const std::list &list1, const std::list &list2); + void assertEquals(const std::wstring &strCase, const std::list &list1, const std::list &list2); void assertEquals(const std::wstring &strCase, const std::dmp_variant &var1, const std::dmp_variant &var2); void assertEquals(const std::wstring &strCase, const std::unordered_map &m1, const std::unordered_map &m2); void assertEquals(const std::wstring &strCase, const std::wstring_list &list1, const std::wstring_list &list2); @@ -75,9 +75,9 @@ class diff_match_patch_test { void assertEmpty(const std::wstring &strCase, const std::wstring_list &list); // Construct the two texts which made up the diff originally. - std::wstring_list diff_rebuildtexts(std::vector diffs); + std::wstring_list diff_rebuildtexts(std::list diffs); // Private function for quickly building lists of diffs. - std::vector diffList( + std::list diffList( // Diff(INSERT, NULL) is invalid and thus is used as the default argument. Diff d1 = Diff(INSERT, NULL), Diff d2 = Diff(INSERT, NULL), Diff d3 = Diff(INSERT, NULL), Diff d4 = Diff(INSERT, NULL), diff --git a/cpp/diff_match_patch_util.cpp b/cpp/diff_match_patch_util.cpp index 88fd563..7c0a859 100644 --- a/cpp/diff_match_patch_util.cpp +++ b/cpp/diff_match_patch_util.cpp @@ -9,7 +9,7 @@ #include #include #include - +#include #include "diff_match_patch_util.h" diff --git a/cpp/diff_match_patch_util.h b/cpp/diff_match_patch_util.h index e53bccd..08fa38a 100644 --- a/cpp/diff_match_patch_util.h +++ b/cpp/diff_match_patch_util.h @@ -1,10 +1,11 @@ #pragma once #include -#include +#include #include #include #include +#include namespace std { using wstring_list = std::vector; From 69d2a661ddc869605e0ff13b96408dc1061c4d6e Mon Sep 17 00:00:00 2001 From: stonewell Date: Sat, 27 Apr 2019 22:04:10 -0700 Subject: [PATCH 5/7] fix tests more --- cpp/diff_match_patch.cpp | 223 +++++++++++----------------------- cpp/diff_match_patch_test.cpp | 10 ++ cpp/diff_match_patch_util.h | 26 +++- 3 files changed, 105 insertions(+), 154 deletions(-) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index b4ef4b4..28bd8ed 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -44,7 +44,7 @@ * @param text The text being applied */ Diff::Diff(Operation _operation, const std::wstring &_text) : - operation(_operation), text(_text), invalid{true} { + operation(_operation), text(_text), invalid{false} { // Construct a diff with the specified operation and text. } @@ -158,7 +158,7 @@ std::wstring Patch::toString() { text += std::wstring(L" "); break; } - text += std::url_encode(aDiff.text, " !~*'();/?:@&=+$,#") + text += std::url_encode(aDiff.text, " !~*'();/?:@&=+$,#-_.") + std::wstring(L"\n"); } @@ -184,12 +184,12 @@ diff_match_patch::diff_match_patch() : std::list diff_match_patch::diff_main(const std::wstring &text1, - const std::wstring &text2) { + const std::wstring &text2) { return diff_main(text1, text2, true); } std::list diff_match_patch::diff_main(const std::wstring &text1, - const std::wstring &text2, bool checklines) { + const std::wstring &text2, bool checklines) { // Set a deadline by which time the diff must be complete. clock_t deadline; if (Diff_Timeout <= 0) { @@ -201,12 +201,7 @@ std::list diff_match_patch::diff_main(const std::wstring &text1, } std::list diff_match_patch::diff_main(const std::wstring &text1, - const std::wstring &text2, bool checklines, clock_t deadline) { - // Check for null inputs. - if (text1.empty() || text2.empty()) { - throw "Null inputs. (diff_main)"; - } - + const std::wstring &text2, bool checklines, clock_t deadline) { // Check for equality (speedup). std::list diffs; if (text1 == text2) { @@ -246,7 +241,7 @@ std::list diff_match_patch::diff_main(const std::wstring &text1, std::list diff_match_patch::diff_compute(std::wstring text1, std::wstring text2, - bool checklines, clock_t deadline) { + bool checklines, clock_t deadline) { std::list diffs; if (text1.empty()) { @@ -296,9 +291,9 @@ std::list diff_match_patch::diff_compute(std::wstring text1, std::wstring const std::wstring mid_common = *it++; // Send both pairs off for separate processing. const std::list diffs_a = diff_main(text1_a, text2_a, - checklines, deadline); + checklines, deadline); const std::list diffs_b = diff_main(text1_b, text2_b, - checklines, deadline); + checklines, deadline); // Merge the results. diffs = diffs_a; diffs.push_back(Diff(EQUAL, mid_common)); @@ -316,7 +311,7 @@ std::list diff_match_patch::diff_compute(std::wstring text1, std::wstring std::list diff_match_patch::diff_lineMode(std::wstring text1, std::wstring text2, - clock_t deadline) { + clock_t deadline) { // Scan the text on a line-by-line basis first. const std::list b = diff_linesToChars(text1, text2); auto it = b.begin(); @@ -340,8 +335,9 @@ std::list diff_match_patch::diff_lineMode(std::wstring text1, std::wstring std::wstring text_insert = L""; std::list::iterator pointer = diffs.begin(); - while (pointer != diffs.end()) { - Diff *thisDiff = &(*pointer); + Diff *thisDiff = std::safe_next_element_ptr(diffs, pointer); + + while (thisDiff != NULL) { switch (thisDiff->operation) { case INSERT: count_insert++; @@ -373,16 +369,16 @@ std::list diff_match_patch::diff_lineMode(std::wstring text1, std::wstring break; } - pointer++; + thisDiff = std::safe_next_element_ptr(diffs, pointer); } - diffs.erase(--diffs.end()); + diffs.pop_back(); return diffs; } std::list diff_match_patch::diff_bisect(const std::wstring &text1, - const std::wstring &text2, clock_t deadline) { + const std::wstring &text2, clock_t deadline) { // Cache the text lengths to prevent multiple calls. const int text1_length = text1.length(); const int text2_length = text2.length(); @@ -500,7 +496,7 @@ std::list diff_match_patch::diff_bisect(const std::wstring &text1, } std::list diff_match_patch::diff_bisectSplit(const std::wstring &text1, - const std::wstring &text2, int x, int y, clock_t deadline) { + const std::wstring &text2, int x, int y, clock_t deadline) { std::wstring text1a = text1.substr(0, x); std::wstring text2a = text2.substr(0, y); std::wstring text1b = safeMid(text1, x); @@ -516,7 +512,7 @@ std::list diff_match_patch::diff_bisectSplit(const std::wstring &text1, } std::list diff_match_patch::diff_linesToChars(const std::wstring &text1, - const std::wstring &text2) { + const std::wstring &text2) { std::wstring_list lineArray; std::unordered_map lineHash; // e.g. linearray[4] == L"Hello\n" @@ -750,13 +746,15 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { std::wstring lastequality; // Always equal to equalities.lastElement().text std::list::iterator pointer = diffs.begin(); // Number of characters that changed prior to the equality. - int length_insertions1 = 0; - int length_deletions1 = 0; + std::wstring::size_type length_insertions1 = 0; + std::wstring::size_type length_deletions1 = 0; // Number of characters that changed after the equality. - int length_insertions2 = 0; - int length_deletions2 = 0; - while (pointer != diffs.end()) { - Diff *thisDiff = &(*pointer); + std::wstring::size_type length_insertions2 = 0; + std::wstring::size_type length_deletions2 = 0; + + Diff *thisDiff = std::safe_next_element_ptr(diffs, pointer); + + while (thisDiff != NULL) { if (thisDiff->operation == EQUAL) { // Equality found. equalities.push_front(*thisDiff); @@ -781,11 +779,9 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { <= std::max(length_insertions2, length_deletions2))) { // printf("Splitting: '%s'\n", qPrintable(lastequality)); // Walk back to offending equality. - while (*thisDiff != equalities.front()) { - pointer--; - thisDiff = &(*pointer); - } - pointer++; + pointer = std::prev(pointer);//move back to this diff + pointer = std::find(diffs.begin(), std::next(pointer), equalities.front()); + thisDiff = &(*pointer); // Replace equality with a delete. *pointer = Diff(DELETE, lastequality); @@ -802,10 +798,8 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { pointer = diffs.begin(); } else { // There is a safe equality we can fall back to. - thisDiff = &(equalities.front()); - while (*thisDiff != *(pointer--)) { - // Intentionally empty loop. - } + pointer = std::find(diffs.begin(), std::next(pointer), equalities.front()); + thisDiff = &(*pointer); } length_insertions1 = 0; // Reset the counters. @@ -817,7 +811,7 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { } } - pointer++; + thisDiff = std::safe_next_element_ptr(diffs, pointer); } // Normalize the diff. @@ -833,16 +827,9 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { // -> defxxxabc // Only extract an overlap if it is as big as the edit ahead or behind it. pointer = diffs.begin(); - Diff *prevDiff = NULL; - Diff *thisDiff = NULL; - if (pointer != diffs.end()) { - prevDiff = &(*pointer); - pointer++; - if (pointer != diffs.end()) { - thisDiff = &(*pointer); - pointer++; - } - } + Diff *prevDiff = std::safe_next_element_ptr(diffs, pointer); + thisDiff = std::safe_next_element_ptr(diffs, pointer); + while (thisDiff != NULL) { if (prevDiff->operation == DELETE && thisDiff->operation == INSERT) { @@ -854,7 +841,7 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { if (overlap_length1 >= deletion.length() / 2.0 || overlap_length1 >= insertion.length() / 2.0) { // Overlap found. Insert an equality and trim the surrounding edits. - pointer--; + pointer = std::prev(pointer); pointer = std::next(diffs.insert(pointer, Diff(EQUAL, insertion.substr(0, overlap_length1)))); prevDiff->text = deletion.substr(0, deletion.length() - overlap_length1); @@ -867,7 +854,7 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { overlap_length2 >= insertion.length() / 2.0) { // Reverse overlap found. // Insert an equality and swap and trim the surrounding edits. - pointer--; + pointer = std::prev(pointer); pointer = std::next(diffs.insert(pointer, Diff(EQUAL, deletion.substr(0, overlap_length2)))); prevDiff->operation = INSERT; prevDiff->text = @@ -879,32 +866,13 @@ void diff_match_patch::diff_cleanupSemantic(std::list &diffs) { } } - if (pointer != diffs.end()) - pointer++; - - thisDiff = pointer != diffs.end() ? &(*pointer) : NULL; + thisDiff = std::safe_next_element_ptr(diffs, pointer); } prevDiff = thisDiff; - if (pointer != diffs.end()) - pointer++; - thisDiff = pointer != diffs.end() ? &(*pointer) : NULL; + thisDiff = std::safe_next_element_ptr(diffs, pointer); } } -namespace std { -template -T * safe_next_element(const std::list & v, typename std::list::iterator & it) { - if (it != v.end()) { - T * p = &(*it); - it++; - return p; - } - - return nullptr; -} -} - - void diff_match_patch::diff_cleanupSemanticLossless(std::list &diffs) { std::wstring equality1, edit, equality2; std::wstring commonString; @@ -913,13 +881,12 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::list &diffs) { std::wstring bestEquality1, bestEdit, bestEquality2; // Create a new iterator at the start. std::list::iterator pointer = diffs.begin(); - Diff *prevDiff = std::safe_next_element(diffs, pointer); - Diff *thisDiff = std::safe_next_element(diffs, pointer); - Diff *nextDiff = std::safe_next_element(diffs, pointer); + Diff *prevDiff = std::safe_next_element_ptr(diffs, pointer); + Diff *thisDiff = std::safe_next_element_ptr(diffs, pointer); + Diff *nextDiff = std::safe_next_element_ptr(diffs, pointer); // Intentionally ignore the first and last element (don't need checking). while (nextDiff != NULL) { - std::cout << "-------" << std::endl; if (prevDiff->operation == EQUAL && nextDiff->operation == EQUAL) { // This is a single edit surrounded by equalities. @@ -930,20 +897,10 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::list &diffs) { // First, shift the edit as far left as possible. commonOffset = diff_commonSuffix(equality1, edit); if (commonOffset != 0) { - std::wcout << L"-------???" << commonString - << ", [" << equality1 - << "], [" << edit - << "], [" << equality2 << "]" - << std::endl; commonString = safeMid(edit, edit.length() - commonOffset); equality1 = equality1.substr(0, equality1.length() - commonOffset); edit = commonString + edit.substr(0, edit.length() - commonOffset); equality2 = commonString + equality2; - std::wcout << L"-------???" << commonString - << ", [" << equality1 - << "], [" << edit - << "], [" << equality2 << "]" - << std::endl; } // Second, step character by character right, looking for the best fit. @@ -967,23 +924,9 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::list &diffs) { bestEquality2 = equality2; } } - std::wcout << L"***-------???" - << "[" << equality1 - << "], [" << edit - << "], [" << equality2 << "]" - << "---------[" << bestEquality1 - << "], [" << bestEdit - << "], [" << bestEquality2 << "]" - << std::endl; if (prevDiff->text != bestEquality1) { // We have an improvement, save it back to the diff. - std::wcout << L"++++++++++++++++++" - << nextDiff->text - << ", " << bestEquality2 - << ", " << &diffs.back() - << ", " << nextDiff - << std::endl; if (!bestEquality1.empty()) { prevDiff->text = bestEquality1; } else { @@ -994,12 +937,6 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::list &diffs) { thisDiff->text = bestEdit; if (!bestEquality2.empty()) { nextDiff->text = bestEquality2; - std::wcout << L"++++++++++++++++++" - << nextDiff->text - << ", " << bestEquality2 - << ", " << &diffs.back() - << ", " << nextDiff - << std::endl; } else { pointer = std::prev(pointer);//move back to nextDiff pointer = diffs.erase(pointer); // Delete nextDiff. @@ -1010,7 +947,7 @@ void diff_match_patch::diff_cleanupSemanticLossless(std::list &diffs) { } prevDiff = thisDiff; thisDiff = nextDiff; - nextDiff = std::safe_next_element(diffs, pointer); + nextDiff = std::safe_next_element_ptr(diffs, pointer); } } @@ -1081,7 +1018,7 @@ void diff_match_patch::diff_cleanupEfficiency(std::list &diffs) { // Is there a deletion operation after the last equality. bool post_del = false; - Diff *thisDiff = std::safe_next_element(diffs, pointer); + Diff *thisDiff = std::safe_next_element_ptr(diffs, pointer); Diff *safeDiff = thisDiff; while (thisDiff != NULL) { @@ -1163,7 +1100,7 @@ void diff_match_patch::diff_cleanupEfficiency(std::list &diffs) { changes = true; } } - thisDiff = std::safe_next_element(diffs, pointer); + thisDiff = std::safe_next_element_ptr(diffs, pointer); } if (changes) { @@ -1179,7 +1116,7 @@ void diff_match_patch::diff_cleanupMerge(std::list &diffs) { int count_insert = 0; std::wstring text_delete = L""; std::wstring text_insert = L""; - Diff *thisDiff = std::safe_next_element(diffs, pointer); + Diff *thisDiff = std::safe_next_element_ptr(diffs, pointer); Diff *prevEqual = NULL; int commonlength; while (thisDiff != NULL) { @@ -1240,7 +1177,7 @@ void diff_match_patch::diff_cleanupMerge(std::list &diffs) { pointer = std::next(diffs.insert(pointer, Diff(INSERT, text_insert))); } // Step forward to the equality. - thisDiff = std::safe_next_element(diffs, pointer); + thisDiff = std::safe_next_element_ptr(diffs, pointer); } else if (prevEqual != NULL) { // Merge this equality with the previous one. prevEqual->text += thisDiff->text; @@ -1255,7 +1192,7 @@ void diff_match_patch::diff_cleanupMerge(std::list &diffs) { prevEqual = thisDiff; break; } - thisDiff = std::safe_next_element(diffs, pointer); + thisDiff = std::safe_next_element_ptr(diffs, pointer); } if (diffs.back().text.empty()) { @@ -1271,9 +1208,9 @@ void diff_match_patch::diff_cleanupMerge(std::list &diffs) { // Create a new iterator at the start. // (As opposed to walking the current one back.) pointer = diffs.begin(); - Diff *prevDiff = std::safe_next_element(diffs, pointer); - thisDiff = std::safe_next_element(diffs, pointer); - Diff *nextDiff = std::safe_next_element(diffs, pointer); + Diff *prevDiff = std::safe_next_element_ptr(diffs, pointer); + thisDiff = std::safe_next_element_ptr(diffs, pointer); + Diff *nextDiff = std::safe_next_element_ptr(diffs, pointer); // Intentionally ignore the first and last element (don't need checking). while (nextDiff != NULL) { @@ -1289,8 +1226,8 @@ void diff_match_patch::diff_cleanupMerge(std::list &diffs) { std::advance(pointer, -3); pointer = diffs.erase(pointer); // Delete prevDiff. pointer = std::next(pointer);//Walk past thisDiff - thisDiff = std::safe_next_element(diffs, pointer); - nextDiff = std::safe_next_element(diffs, pointer); + thisDiff = std::safe_next_element_ptr(diffs, pointer); + nextDiff = std::safe_next_element_ptr(diffs, pointer); changes = true; } else if (std::starts_with(thisDiff->text, nextDiff->text)) { // Shift the edit over the next equality. @@ -1299,13 +1236,13 @@ void diff_match_patch::diff_cleanupMerge(std::list &diffs) { + nextDiff->text; pointer = std::prev(pointer);//back to next diff pointer = diffs.erase(pointer); // Delete nextDiff. - nextDiff = std::safe_next_element(diffs, pointer); + nextDiff = std::safe_next_element_ptr(diffs, pointer); changes = true; } } prevDiff = thisDiff; thisDiff = nextDiff; - nextDiff = std::safe_next_element(diffs, pointer); + nextDiff = std::safe_next_element_ptr(diffs, pointer); } // If shifts were made, the diff needs reordering and another shift sweep. if (changes) { @@ -1422,11 +1359,12 @@ int diff_match_patch::diff_levenshtein(const std::list &diffs) { std::wstring diff_match_patch::diff_toDelta(const std::list &diffs) { std::wstring text; - for(Diff aDiff : diffs) { + + for(const auto & aDiff : diffs) { switch (aDiff.operation) { case INSERT: { std::wstring encoded = std::url_encode(aDiff.text, - " !~*'();/?:@&=+$,#"); + " !~*'();/?:@&=+$,#-_."); text += std::wstring(L"+") + encoded + std::wstring(L"\t"); break; } @@ -1449,7 +1387,7 @@ std::wstring diff_match_patch::diff_toDelta(const std::list &diffs) { std::list diff_match_patch::diff_fromDelta(const std::wstring &text1, - const std::wstring &delta) { + const std::wstring &delta) { std::list diffs; int pointer = 0; // Cursor in text1 std::wstring_list tokens = std::split(delta, '\t'); @@ -1500,11 +1438,6 @@ std::list diff_match_patch::diff_fromDelta(const std::wstring &text1, int diff_match_patch::match_main(const std::wstring &text, const std::wstring &pattern, int loc) { - // Check for null inputs. - if (text.empty() || pattern.empty()) { - throw "Null inputs. (match_main)"; - } - loc = std::max((std::wstring::size_type)0, std::min((std::wstring::size_type)loc, text.length())); if (text == pattern) { // Shortcut (potentially not guaranteed by the algorithm) @@ -1702,12 +1635,7 @@ void diff_match_patch::patch_addContext(Patch &patch, const std::wstring &text) std::list diff_match_patch::patch_make(const std::wstring &text1, - const std::wstring &text2) { - // Check for null inputs. - if (text1.empty() || text2.empty()) { - throw "Null inputs. (patch_make)"; - } - + const std::wstring &text2) { // No diffs provided, compute our own. std::list diffs = diff_main(text1, text2, true); if (diffs.size() > 2) { @@ -1727,8 +1655,8 @@ std::list diff_match_patch::patch_make(const std::list &diffs) { std::list diff_match_patch::patch_make(const std::wstring &text1, - const std::wstring &text2, - const std::list &diffs) { + const std::wstring &text2, + const std::list &diffs) { // text2 is entirely unused. return patch_make(text1, diffs); @@ -1737,12 +1665,7 @@ std::list diff_match_patch::patch_make(const std::wstring &text1, std::list diff_match_patch::patch_make(const std::wstring &text1, - const std::list &diffs) { - // Check for null inputs. - if (text1.empty()) { - throw "Null inputs. (patch_make)"; - } - + const std::list &diffs) { std::list patches; if (diffs.empty()) { return patches; // Get rid of the null case. @@ -2008,19 +1931,15 @@ void diff_match_patch::patch_splitMax(std::list &patches) { auto pointer = patches.begin(); Patch bigpatch; - if (pointer != patches.end()) { - bigpatch = *pointer; - } + bigpatch = std::safe_next_element(patches, pointer); while (!bigpatch.isNull()) { if (bigpatch.length1 <= patch_size) { - if (pointer != patches.end()) - bigpatch = *pointer++; - else - bigpatch = Patch(); + bigpatch = std::safe_next_element(patches, pointer); continue; } // Remove the big old patch. + pointer = std::prev(pointer); pointer = patches.erase(pointer); start1 = bigpatch.start1; start2 = bigpatch.start2; @@ -2044,7 +1963,7 @@ void diff_match_patch::patch_splitMax(std::list &patches) { patch.length2 += diff_text.length(); start2 += diff_text.length(); patch.diffs.push_back(bigpatch.diffs.front()); - bigpatch.diffs.erase(bigpatch.diffs.begin()); + bigpatch.diffs.pop_front(); empty = false; } else if (diff_type == DELETE && patch.diffs.size() == 1 && patch.diffs.front().operation == EQUAL @@ -2054,7 +1973,7 @@ void diff_match_patch::patch_splitMax(std::list &patches) { start1 += diff_text.length(); empty = false; patch.diffs.push_back(Diff(diff_type, diff_text)); - bigpatch.diffs.erase(bigpatch.diffs.begin()); + bigpatch.diffs.pop_front(); } else { // Deletion or equality. Only take as much as we can stomach. diff_text = diff_text.substr(0, std::min(diff_text.length(), @@ -2069,7 +1988,7 @@ void diff_match_patch::patch_splitMax(std::list &patches) { } patch.diffs.push_back(Diff(diff_type, diff_text)); if (diff_text == bigpatch.diffs.front().text) { - bigpatch.diffs.erase(bigpatch.diffs.begin()); + bigpatch.diffs.pop_front(); } else { bigpatch.diffs.front().text = safeMid(bigpatch.diffs.front().text, diff_text.length()); @@ -2099,10 +2018,8 @@ void diff_match_patch::patch_splitMax(std::list &patches) { pointer = std::next(patches.insert(pointer, patch)); } } - if (pointer != patches.end()) - bigpatch = *pointer++; - else - bigpatch = Patch(); + + bigpatch = std::safe_next_element(patches, pointer); } } diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index cc2464d..f8ba42d 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -20,6 +20,7 @@ #include #include #include +#include #include "diff_match_patch.h" #include "diff_match_patch_test.h" @@ -74,7 +75,10 @@ void diff_match_patch_test::run_all_tests() { std::debug_print(L"All tests passed."); } catch (std::wstring strCase) { std::debug_print(L"Test failed: %ls", qPrintable(strCase)); + } catch (char const *strCase) { + std::cout << "Test failed:" << strCase << std::endl; } + } // DIFF TEST FUNCTIONS @@ -582,6 +586,7 @@ void diff_match_patch_test::testDiffMain() { std::wstring_list texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); assertEquals(L"diff_main: Overlap line-mode.", texts_textmode, texts_linemode); + /*invalid case // Test null inputs. try { dmp.diff_main(NULL, NULL); @@ -589,6 +594,7 @@ void diff_match_patch_test::testDiffMain() { } catch (const char* ex) { // Exception expected. } + */ } @@ -669,6 +675,7 @@ void diff_match_patch_test::testMatchMain() { assertEquals(L"match_main: Complex match.", 4, dmp.match_main(L"I am the very model of a modern major general.", L" that berry ", 5)); dmp.Match_Threshold = 0.5f; + /* invalid case for std::wstring // Test null inputs. try { dmp.match_main(NULL, NULL, 0); @@ -676,6 +683,7 @@ void diff_match_patch_test::testMatchMain() { } catch (const char* ex) { // Exception expected. } + */ } @@ -787,12 +795,14 @@ void diff_match_patch_test::testPatchMake() { assertEquals(L"patch_make: Long string with repeats.", expectedPatch, dmp.patch_toText(patches)); // Test null inputs. + /* invalid case try { dmp.patch_make(NULL, NULL); assertFalse(L"patch_make: Null inputs.", true); } catch (const char* ex) { // Exception expected. } + */ } void diff_match_patch_test::testPatchSplitMax() { diff --git a/cpp/diff_match_patch_util.h b/cpp/diff_match_patch_util.h index 08fa38a..983e30c 100644 --- a/cpp/diff_match_patch_util.h +++ b/cpp/diff_match_patch_util.h @@ -21,6 +21,8 @@ std::wstring url_encode(const std::wstring &value, const std::string & exclude=" std::string url_decode(const std::string &value); std::wstring url_decode(const std::wstring &value); void debug_print(const wchar_t * f, ...); +std::wstring var_to_string(const dmp_variant & var); + template std::wstring join(const InputIterator & begin, const InputIterator & end, @@ -37,5 +39,27 @@ std::wstring join(const v_t & v, const std::wstring & delimiters) { return join(v.begin(), v.end(), delimiters); } -std::wstring var_to_string(const dmp_variant & var); +template +T * safe_next_element_ptr(const std::list & v, typename std::list::iterator & it) { + if (it != v.end()) { + T * p = &(*it); + it++; + return p; + } + + return nullptr; +} + +template +T safe_next_element(const std::list & v, typename std::list::iterator & it) { + if (it != v.end()) { + T p = (*it); + it++; + return p; + } + + return T(); +} + + } From bc37bcc9577917b985c67eaa5a9e4f116ab9fe1c Mon Sep 17 00:00:00 2001 From: stonewell Date: Sat, 27 Apr 2019 23:38:40 -0700 Subject: [PATCH 6/7] all test passed --- cpp/diff_match_patch.cpp | 22 +++++++++++++--------- cpp/diff_match_patch.h | 21 +++++++++++++++++++-- cpp/diff_match_patch_test.cpp | 4 +++- 3 files changed, 35 insertions(+), 12 deletions(-) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index 28bd8ed..bf0fb01 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -146,7 +146,7 @@ std::wstring Patch::toString() { text = std::wstring(L"@@ -") + coords1 + std::wstring(L" +") + coords2 + std::wstring(L" @@\n"); // Escape the body of the patch with %xx notation. - for(Diff aDiff : diffs) { + for(const auto & aDiff : diffs) { switch (aDiff.operation) { case INSERT: text += std::wstring(L"+"); @@ -1468,12 +1468,12 @@ int diff_match_patch::match_bitap(const std::wstring &text, const std::wstring & // Highest score beyond which we give up. double score_threshold = Match_Threshold; // Is there a nearby exact match? (speedup) - auto best_loc = text.find(pattern, loc); + int best_loc = text.find(pattern, loc); if (best_loc != std::wstring::npos) { score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); // What about in the other direction? (speedup) - best_loc = text.find_last_of(pattern, loc + pattern.length()); + best_loc = text.rfind(pattern, loc + pattern.length()); if (best_loc != std::wstring::npos) { score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); @@ -1541,7 +1541,10 @@ int diff_match_patch::match_bitap(const std::wstring &text, const std::wstring & best_loc = j - 1; if (best_loc > loc) { // When passing loc, don't exceed our current distance from loc. - start = std::max((std::wstring::size_type)1, 2 * loc - best_loc); + if (2 * loc > best_loc) + start = std::max(1, 2 * loc - best_loc); + else + start = 1; } else { // Already passed loc, downhill from here on in. break; @@ -1596,12 +1599,13 @@ void diff_match_patch::patch_addContext(Patch &patch, const std::wstring &text) if (text.empty()) { return; } + std::wstring pattern = safeMid(text, patch.start2, patch.length1); int padding = 0; // Look for the first and last matches of pattern in text. If two different // matches are found, increase the pattern length. - while (text.find(pattern) != text.find_last_of(pattern) + while (text.find(pattern) != text.rfind(pattern) && pattern.length() < Match_MaxBits - Patch_Margin - Patch_Margin) { padding += Patch_Margin; pattern = safeMid(text, std::max(0, patch.start2 - padding), @@ -1614,8 +1618,9 @@ void diff_match_patch::patch_addContext(Patch &patch, const std::wstring &text) // Add the prefix. std::wstring prefix = safeMid(text, std::max(0, patch.start2 - padding), patch.start2 - std::max(0, patch.start2 - padding)); + if (!prefix.empty()) { - patch.diffs.insert(patch.diffs.begin(), Diff(EQUAL, prefix)); + patch.diffs.push_front(Diff(EQUAL, prefix)); } // Add the suffix. std::wstring suffix = safeMid(text, patch.start2 + patch.length1, @@ -1870,9 +1875,7 @@ std::wstring diff_match_patch::patch_addPadding(std::list &patches) { } // Bump all the patches forward. - auto pointer = patches.begin(); - while (pointer != patches.end()) { - Patch &aPatch = *pointer; + for(auto & aPatch : patches) { aPatch.start1 += paddingLength; aPatch.start2 += paddingLength; } @@ -1943,6 +1946,7 @@ void diff_match_patch::patch_splitMax(std::list &patches) { pointer = patches.erase(pointer); start1 = bigpatch.start1; start2 = bigpatch.start2; + precontext = L""; while (!bigpatch.diffs.empty()) { // Create one of several smaller patches. diff --git a/cpp/diff_match_patch.h b/cpp/diff_match_patch.h index 62c9b13..f829137 100644 --- a/cpp/diff_match_patch.h +++ b/cpp/diff_match_patch.h @@ -600,7 +600,7 @@ class diff_match_patch { */ private: static inline std::wstring safeMid(const std::wstring &str, int pos) { - return (pos == str.length()) ? std::wstring(L"") : str.substr(pos); + return safeMid(str, pos, -1); } /** @@ -613,7 +613,24 @@ class diff_match_patch { */ private: static inline std::wstring safeMid(const std::wstring &str, int pos, int len) { - return (pos == str.length()) ? std::wstring(L"") : str.substr(pos, len); + if (str.empty() || pos >= str.length()) + return std::wstring(L""); + + if (len < 0) + len = str.length() - pos; + + if (pos < 0) { + len += pos; + pos = 0; + } + + if (len + pos > str.length()) + len = str.length() - pos; + + if (pos == 0 && len == str.length()) + return str; + + return str.substr(pos, len); } }; diff --git a/cpp/diff_match_patch_test.cpp b/cpp/diff_match_patch_test.cpp index f8ba42d..77d9359 100644 --- a/cpp/diff_match_patch_test.cpp +++ b/cpp/diff_match_patch_test.cpp @@ -714,6 +714,8 @@ void diff_match_patch_test::testPatchFromText() { assertEquals(L"patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", dmp.patch_fromText(L"@@ -0,0 +1,3 @@\n+abc\n").begin()->toString()); + assertEquals(L"patch_fromText: #5.", L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n", dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n").front().toString()); + // Generates error. try { dmp.patch_fromText(L"Bad\nPatch\n"); @@ -736,7 +738,7 @@ void diff_match_patch_test::testPatchToText() { void diff_match_patch_test::testPatchAddContext() { dmp.Patch_Margin = 4; - Patch & p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n").front(); + Patch p = dmp.patch_fromText(L"@@ -21,4 +21,10 @@\n-jump\n+somersault\n").front(); dmp.patch_addContext(p, L"The quick brown fox jumps over the lazy dog."); assertEquals(L"patch_addContext: Simple case.", L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", p.toString()); From c1339e10b621b0ce9cc566553cc6b04b867a50e4 Mon Sep 17 00:00:00 2001 From: stonewell Date: Sun, 28 Apr 2019 10:50:58 -0700 Subject: [PATCH 7/7] Makefile add dependency generating, change for loop using const auto & to avoid copy --- cpp/.gitignore | 1 + cpp/Makefile | 22 ++++++++++++++++++---- cpp/diff_match_patch.cpp | 28 ++++++++++++++-------------- cpp/diff_match_patch.h | 2 +- 4 files changed, 34 insertions(+), 19 deletions(-) diff --git a/cpp/.gitignore b/cpp/.gitignore index 1be7842..dc0fac0 100644 --- a/cpp/.gitignore +++ b/cpp/.gitignore @@ -1,3 +1,4 @@ libdiff_match_patch.a objs/ +.d/ test_diff_match_patch diff --git a/cpp/Makefile b/cpp/Makefile index 7f317e2..ec680f7 100644 --- a/cpp/Makefile +++ b/cpp/Makefile @@ -12,12 +12,20 @@ TEST_SRC := diff_match_patch_test.cpp TEST_OBJECTS := $(addprefix $(OBJDIR)/, $(notdir $(patsubst %.cpp,%.o,$(TEST_SRC)))) TEST_EXEC := test_diff_match_patch +DEPDIR := .d +DEPFLAGS = -MT $@ -MMD -MP -MF$(DEPDIR)/$*.Td + +POSTCOMPILE = @mv -f $(DEPDIR)/$*.Td $(DEPDIR)/$*.d && touch $@ + +ALL_SRC := $(SOURCES) $(TEST_SRC) + .PHONY: all clean init all: init $(OUT_LIB) $(TEST_EXEC) -$(OBJDIR)/%.o : %.cpp %.h - $(CXX) $(CFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -o "$@" "$<" +$(OBJDIR)/%.o : %.cpp %.h $(DEPDIR)/%.d + $(CXX) $(DEPFLAGS) $(CFLAGS) $(CXXFLAGS) $(CPPFLAGS) -c -o "$@" "$<" + $(POSTCOMPILE) $(OUT_LIB) : $(OBJECTS) $(AR) -rv "$@" $(OBJECTS) @@ -26,7 +34,13 @@ $(TEST_EXEC) : $(TEST_OBJECTS) $(OUT_LIB) $(CXX) $(LDFLAGS) -o "$@" "$<" $(OUT_LIB) init: - mkdir -p $(OBJDIR) + mkdir -p $(OBJDIR) $(DEPDIR) clean: - $(RM) -r $(OUT_LIB) $(OBJDIR) $(TEST_EXEC) + $(RM) -r $(OUT_LIB) $(OBJDIR) $(TEST_EXEC) $(DEPDIR) + +$(DEPDIR)/%.d: ; +.PRECIOUS: $(DEPDIR)/%.d + + +include $(wildcard $(patsubst %,$(DEPDIR)/%.d,$(basename $(ALL_SRC)))) diff --git a/cpp/diff_match_patch.cpp b/cpp/diff_match_patch.cpp index bf0fb01..ede7e5c 100644 --- a/cpp/diff_match_patch.cpp +++ b/cpp/diff_match_patch.cpp @@ -124,7 +124,7 @@ bool Patch::isNull() const { * Indices are printed as 1-based, not 0-based. * @return The GNU diff string */ -std::wstring Patch::toString() { +std::wstring Patch::toString() const { std::wstring coords1, coords2; if (length1 == 0) { coords1 = std::to_wstring(start1) + std::wstring(L",0"); @@ -357,7 +357,7 @@ std::list diff_match_patch::diff_lineMode(std::wstring text1, std::wstring std::advance(p2, -1 * (count_delete + count_insert)); diffs.erase(p2, pointer); - for(Diff newDiff: + for(auto const & newDiff: diff_main(text_delete, text_insert, false, deadline)) { pointer = std::next(diffs.insert(pointer, newDiff)); } @@ -1257,7 +1257,7 @@ int diff_match_patch::diff_xIndex(const std::list &diffs, int loc) { int last_chars1 = 0; int last_chars2 = 0; Diff lastDiff; - for(Diff aDiff: diffs) { + for(const auto & aDiff: diffs) { if (aDiff.operation != INSERT) { // Equality or deletion. chars1 += aDiff.text.length(); @@ -1286,7 +1286,7 @@ int diff_match_patch::diff_xIndex(const std::list &diffs, int loc) { std::wstring diff_match_patch::diff_prettyHtml(const std::list &diffs) { std::wstring html; std::wstring text; - for(Diff aDiff : diffs) { + for(const auto & aDiff : diffs) { text = aDiff.text; std::replace_all(text, L"&", L"&"); std::replace_all(text, L"<", L"<"); @@ -1312,7 +1312,7 @@ std::wstring diff_match_patch::diff_prettyHtml(const std::list &diffs) { std::wstring diff_match_patch::diff_text1(const std::list &diffs) { std::wstring text; - for(Diff aDiff: diffs) { + for(const auto & aDiff: diffs) { if (aDiff.operation != INSERT) { text += aDiff.text; } @@ -1323,7 +1323,7 @@ std::wstring diff_match_patch::diff_text1(const std::list &diffs) { std::wstring diff_match_patch::diff_text2(const std::list &diffs) { std::wstring text; - for(Diff aDiff : diffs) { + for(const auto & aDiff : diffs) { if (aDiff.operation != DELETE) { text += aDiff.text; } @@ -1336,7 +1336,7 @@ int diff_match_patch::diff_levenshtein(const std::list &diffs) { int levenshtein = 0; int insertions = 0; int deletions = 0; - for(Diff aDiff : diffs) { + for(const auto & aDiff : diffs) { switch (aDiff.operation) { case INSERT: insertions += aDiff.text.length(); @@ -1391,7 +1391,7 @@ std::list diff_match_patch::diff_fromDelta(const std::wstring &text1, std::list diffs; int pointer = 0; // Cursor in text1 std::wstring_list tokens = std::split(delta, '\t'); - for(std::wstring token: tokens) { + for(const auto & token: tokens) { if (token.empty()) { // Blank tokens are ok (from a trailing \t). continue; @@ -1683,7 +1683,7 @@ std::list diff_match_patch::patch_make(const std::wstring &text1, // context info. std::wstring prepatch_text = text1; std::wstring postpatch_text = text1; - for(Diff aDiff : diffs) { + for(const auto & aDiff : diffs) { if (patch.diffs.empty() && aDiff.operation != EQUAL) { // A new patch starts here. patch.start1 = char_count1; @@ -1749,9 +1749,9 @@ std::list diff_match_patch::patch_make(const std::wstring &text1, std::list diff_match_patch::patch_deepCopy(std::list &patches) { std::list patchesCopy; - for(Patch aPatch: patches) { + for(const auto & aPatch: patches) { Patch patchCopy = Patch(); - for(Diff aDiff: aPatch.diffs) { + for(const auto & aDiff: aPatch.diffs) { Diff diffCopy = Diff(aDiff.operation, aDiff.text); patchCopy.diffs.push_back(diffCopy); } @@ -1786,7 +1786,7 @@ std::pair > diff_match_patch::patch_apply( // has an effective expected position of 22. int delta = 0; std::vector results(patchesCopy.size()); - for(Patch aPatch: patchesCopy) { + for(const auto & aPatch: patchesCopy) { int expected_loc = aPatch.start2 + delta; std::wstring text1 = diff_text1(aPatch.diffs); int start_loc; @@ -1837,7 +1837,7 @@ std::pair > diff_match_patch::patch_apply( } else { diff_cleanupSemanticLossless(diffs); int index1 = 0; - for(Diff aDiff: aPatch.diffs) { + for(const auto & aDiff: aPatch.diffs) { if (aDiff.operation != EQUAL) { int index2 = diff_xIndex(diffs, index1); if (aDiff.operation == INSERT) { @@ -2030,7 +2030,7 @@ void diff_match_patch::patch_splitMax(std::list &patches) { std::wstring diff_match_patch::patch_toText(const std::list &patches) { std::wstring text; - for(Patch aPatch: patches) { + for(const auto & aPatch: patches) { text.append(aPatch.toString()); } return text; diff --git a/cpp/diff_match_patch.h b/cpp/diff_match_patch.h index f829137..c6903b4 100644 --- a/cpp/diff_match_patch.h +++ b/cpp/diff_match_patch.h @@ -105,7 +105,7 @@ class Patch { */ Patch(); bool isNull() const; - std::wstring toString(); + std::wstring toString() const; };