diff --git a/AUTHORS b/AUTHORS index c82809e..90c65e9 100644 --- a/AUTHORS +++ b/AUTHORS @@ -7,4 +7,4 @@ Duncan Cross (Lua port) Jan Weiß (Objective C port) Matthaeus G. Chajdas (C# port) Mike Slemmer (C++ port) - +Scott Aron Bloom (C++11 port-remove Qt dependency) diff --git a/cpp17/CMakeLists.txt b/cpp17/CMakeLists.txt new file mode 100644 index 0000000..27439fe --- /dev/null +++ b/cpp17/CMakeLists.txt @@ -0,0 +1,32 @@ +cmake_minimum_required(VERSION 3.22) +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED YES) + + +SET( _PROJECT_NAME diff_match_patch_cpp17 ) +project( ${_PROJECT_NAME} ) +add_library(${_PROJECT_NAME} + STATIC + diff_match_patch.cpp + diff_match_patch.h + diff_match_patch_utils.cpp + diff_match_patch_utils.h +) + +target_include_directories( ${_PROJECT_NAME} PUBLIC ${CMAKE_SOURCE_DIR} ) +target_link_libraries( ${_PROJECT_NAME} ) + +SET( TEST_NAME "${_PROJECT_NAME}_test" ) +project( ${TEST_NAME} ) +add_executable( ${TEST_NAME} diff_match_patch_test.cpp diff_match_patch_test.h diff_match_patch_test_assertEquals.cpp) + +target_include_directories( ${TEST_NAME} PUBLIC ${CMAKE_SOURCE_DIR}) +if( USE_GTEST ) + SET( GTEST_LIBS gtest gmock ) + target_include_directories( ${_PROJECT_NAME} PUBLIC ${GOOGLETEST_ROOT_DIR}/googletest/include ) + target_compile_definitions( ${_PROJECT_NAME} PUBLIC USE_GTEST ) + target_compile_definitions( ${TEST_NAME} PUBLIC USE_GTEST ) +endif() + +target_link_libraries( diff_match_patch_cpp17_test ${_PROJECT_NAME} ${GTEST_LIBS}) +add_test( ${TEST_NAME} ${TEST_NAME} ) diff --git a/cpp17/diff_match_patch.cpp b/cpp17/diff_match_patch.cpp new file mode 100644 index 0000000..3cbfcc3 --- /dev/null +++ b/cpp17/diff_match_patch.cpp @@ -0,0 +1,2200 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "diff_match_patch_utils.h" + +#ifdef WIN32 +std::size_t kZERO{0ULL}; +std::size_t kONE{1ULL}; +#else +unsigned long kZERO{0UL}; +unsigned long kONE{1UL}; +#endif + +////////////////////////// +// +// Diff Class +// +////////////////////////// + +/** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL + * @param text The text being applied + */ +Diff::Diff(Operation _operation, const std::wstring &_text) + : operation(_operation), text(_text) { + // Construct a diff with the specified operation and text. +} + +Diff::Diff() {} + +Diff::Diff(Operation _operation, const wchar_t *_text) + : Diff(_operation, (_text ? std::wstring(_text) : std::wstring(L""))) {} + +Diff::Diff(Operation _operation, const std::string &_text) + : Diff(_operation, NUtils::to_wstring(_text)) {} + +Diff::Diff(Operation _operation, const char *_text) + : Diff(_operation, std::string(_text)) {} + +std::wstring Diff::strOperation(Operation op) { + switch (op) { + case INSERT: + return L"INSERT"; + case DELETE: + return L"DELETE"; + case EQUAL: + return L"EQUAL"; + } + throw "Invalid operation."; +} + +/** + * Display a human-readable version of this Diff. + * @return text version + */ +std::wstring Diff::toString() const { + std::wstring prettyText = text; + // Replace linebreaks with Pilcrow signs. + std::replace(prettyText.begin(), prettyText.end(), L'\n', L'\u00b6'); + return std::wstring(L"Diff(") + strOperation(operation) + + std::wstring(L",\"") + prettyText + std::wstring(L"\")"); +} + +/** + * Is this Diff equivalent to another Diff? + * @param d Another Diff to compare against + * @return true or false + */ +bool Diff::operator==(const Diff &d) const { + return (d.operation == this->operation) && (d.text == this->text); +} + +bool Diff::operator!=(const Diff &d) const { return !(operator==(d)); } + +///////////////////////////////////////////// +// +// Patch Class +// +///////////////////////////////////////////// + +/** + * Constructor. Initializes with an empty list of diffs. + */ +Patch::Patch() {} + +Patch::Patch(std::wstring &text) { + std::wsmatch matches; + auto patchHeader = std::wregex(LR"(^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$)"); + if (!std::regex_match(text, matches, patchHeader) || (matches.size() != 5)) { + throw std::wstring(L"Invalid patch string: " + text); + } + start1 = NUtils::toInt(matches[1].str()); + if (!matches[2].length()) { + start1--; + length1 = 1; + } else if (matches[2].str() == L"0") { + length1 = 0; + } else { + start1--; + length1 = NUtils::toInt(matches[2].str()); + } + + start2 = NUtils::toInt(matches[3].str()); + if (!matches[4].length()) { + start2--; + length2 = 1; + } else if (matches[4].str() == L"0") { + length2 = 0; + } else { + start2--; + length2 = NUtils::toInt(matches[4].str()); + } + text.erase(text.begin()); +} + +bool Patch::isNull() const { + if (start1 == 0 && start2 == 0 && length1 == 0 && length2 == 0 && + diffs.empty()) { + return true; + } + return false; +} + +/** + * Emulate GNU diff's format. + * Header: @@ -382,8 +481,9 @@ + * Indices are printed as 1-based, not 0-based. + * @return The GNU diff string + */ +std::wstring Patch::toString() const { + auto text = getPatchHeader(); + // Escape the body of the patch with %xx notation. + for (auto &&aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + text += L"+"; + break; + case DELETE: + text += L"-"; + break; + case EQUAL: + text += L" "; + break; + } + text += NUtils::toPercentEncoding(aDiff.text, L" !~*'();/?:@&=+$,#") + + std::wstring(L"\n"); + } + + return text; +} + +std::wstring Patch::getPatchHeader() const { + auto coords1 = getCoordinateString(start1, length1); + auto coords2 = getCoordinateString(start2, length2); + auto text = std::wstring(L"@@ -") + coords1 + std::wstring(L" +") + coords2 + + std::wstring(L" @@\n"); + return text; +} + +std::wstring Patch::getCoordinateString(std::size_t start, + std::size_t length) const { + std::wstring retVal; + if (length == 0) { + retVal = std::to_wstring(start) + std::wstring(L",0"); + } else if (length == 1) { + retVal = std::to_wstring(start + 1); + } else { + retVal = std::to_wstring(start + 1) + std::wstring(L",") + + std::to_wstring(length); + } + return retVal; +} + +///////////////////////////////////////////// +// +// diff_match_patch Class +// +///////////////////////////////////////////// + +// all class members initialized in the class +diff_match_patch::diff_match_patch() {} + +TDiffVector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2) { + return diff_main(text1, text2, true); +} + +TDiffVector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2, + bool checklines) { + // Set a deadline by which time the diff must be complete. + clock_t deadline; + if (Diff_Timeout <= 0) { + deadline = std::numeric_limits::max(); + } else { + deadline = clock() + (clock_t)(Diff_Timeout * CLOCKS_PER_SEC); + } + return diff_main(text1, text2, checklines, deadline); +} + +TDiffVector diff_match_patch::diff_main(const std::wstring &text1, + const std::wstring &text2, + bool checklines, clock_t deadline) { + // Check for equality (speedup). + TDiffVector diffs; + if (text1 == text2) { + if (!text1.empty()) { + diffs.emplace_back(EQUAL, text1); + } + return diffs; + } + + if (!text1.empty() && text2.empty()) { + diffs.emplace_back(DELETE, text1); + return diffs; + } + + if (text1.empty() && !text2.empty()) { + diffs.emplace_back(INSERT, text2); + return diffs; + } + + // Trim off common prefix (speedup). + auto commonlength = diff_commonPrefix(text1, text2); + auto commonprefix = text1.substr(0, commonlength); + auto textChopped1 = text1.substr(commonlength); + auto textChopped2 = text2.substr(commonlength); + + // Trim off common suffix (speedup). + commonlength = diff_commonSuffix(textChopped1, textChopped2); + auto commonsuffix = textChopped1.substr(textChopped1.length() - commonlength); + textChopped1 = textChopped1.substr(0, textChopped1.length() - commonlength); + textChopped2 = textChopped2.substr(0, textChopped2.length() - commonlength); + + // Compute the diff on the middle block. + diffs = diff_compute(textChopped1, textChopped2, checklines, deadline); + + // Restore the prefix and suffix. + if (!commonprefix.empty()) { + diffs.emplace(diffs.begin(), EQUAL, commonprefix); + } + if (!commonsuffix.empty()) { + diffs.emplace_back(EQUAL, commonsuffix); + } + + diff_cleanupMerge(diffs); + + return diffs; +} + +TDiffVector diff_match_patch::diff_main(const std::string &text1, + const std::string &text2) { + return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); +} + +TDiffVector diff_match_patch::diff_main(const std::string &text1, + const std::string &text2, + bool checklines) { + return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + checklines); +} + +TDiffVector diff_match_patch::diff_main(const std::string &text1, + const std::string &text2, + bool checklines, clock_t deadline) { + return diff_main(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + checklines, deadline); +} + +TDiffVector diff_match_patch::diff_compute(const std::wstring &text1, + const std::wstring &text2, + bool checklines, clock_t deadline) { + TDiffVector diffs; + + if (text1.empty()) { + // Just add some text (speedup). + diffs.emplace_back(INSERT, text2); + return diffs; + } + + if (text2.empty()) { + // Just delete some text (speedup). + diffs.emplace_back(DELETE, text1); + return diffs; + } + + { + auto [longtext, shorttext] = (text1.length() > text2.length()) + ? std::make_pair(text1, text2) + : std::make_pair(text2, text1); + auto i = longtext.find(shorttext); + if (i != std::string::npos) { + // Shorter text is inside the longer text (speedup). + const Operation op = (text1.length() > text2.length()) ? DELETE : INSERT; + diffs.emplace_back(op, longtext.substr(0, i)); + diffs.emplace_back(EQUAL, shorttext); + diffs.emplace_back(op, safeMid(longtext, i + shorttext.length())); + return diffs; + } + + if (shorttext.length() == 1) { + // Single character string. + // After the previous speedup, the character can't be an equality. + diffs.emplace_back(DELETE, text1); + diffs.emplace_back(INSERT, text2); + return diffs; + } + // Garbage collect longtext and shorttext by scoping out. + } + + // Check to see if the problem can be split in two. + const TStringVector hm = diff_halfMatch(text1, text2); + if (!hm.empty()) { + // A half-match was found, sort out the return data. + auto &&text1_a = hm[0]; + auto &&text1_b = hm[1]; + auto &&text2_a = hm[2]; + auto &&text2_b = hm[3]; + auto &&mid_common = hm[4]; + // Send both pairs off for separate processing. + diffs = diff_main(text1_a, text2_a, checklines, deadline); + const TDiffVector diffs_b = + diff_main(text1_b, text2_b, checklines, deadline); + // Merge the results. + diffs.emplace_back(EQUAL, mid_common); + diffs.insert(diffs.end(), diffs_b.begin(), diffs_b.end()); + return diffs; + } + + // Perform a real diff. + if (checklines && (text1.length() > 100) && (text2.length() > 100)) { + return diff_lineMode(text1, text2, deadline); + } + + return diff_bisect(text1, text2, deadline); +} + +TDiffVector diff_match_patch::diff_compute(const std::string &text1, + const std::string &text2, + bool checklines, clock_t deadline) { + return diff_compute(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + checklines, deadline); +} + +TDiffVector diff_match_patch::diff_lineMode(std::wstring text1, + std::wstring text2, + clock_t deadline) { + // Scan the text on a line-by-line basis first. + auto a = diff_linesToChars(text1, text2); + text1 = std::get(a[0]); + text2 = std::get(a[1]); + auto linearray = std::get(a[2]); + + auto diffs = diff_main(text1, text2, false, deadline); + + // Convert the diff back to original text. + diff_charsToLines(diffs, linearray); + // Eliminate freak matches (e.g. blank lines) + diff_cleanupSemantic(diffs); + + // Rediff any replacement blocks, this time character-by-character. + // Add a dummy entry at the end. + diffs.emplace_back(EQUAL, L""); + std::size_t pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + while (pointer < diffs.size()) { + switch (diffs[pointer].operation) { + case INSERT: + count_insert++; + text_insert += diffs[pointer].text; + break; + case DELETE: + count_delete++; + text_delete += diffs[pointer].text; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete >= 1 && count_insert >= 1) { + // Delete the offending records and add the merged ones. + auto numElements = count_delete + count_insert; + auto start = diffs.begin() + pointer - numElements; + auto end = start + numElements; + diffs.erase(start, end); + pointer = pointer - count_delete - count_insert; + auto subDiff = diff_main(text_delete, text_insert, false, deadline); + diffs.insert(diffs.begin() + pointer, subDiff.begin(), subDiff.end()); + pointer = pointer + subDiff.size(); + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + break; + } + pointer++; + } + diffs.pop_back(); // Remove the dummy entry at the end. + + return diffs; +} + +TDiffVector diff_match_patch::diff_lineMode(std::string text1, + std::string text2, + clock_t deadline) { + return diff_lineMode(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + deadline); +} + +// using int64_t rather thant size_t due to the backward walking nature of the +// algorithm +TDiffVector diff_match_patch::diff_bisect(const std::wstring &text1, + const std::wstring &text2, + clock_t deadline) { + // Cache the text lengths to prevent multiple calls. + auto text1_length = static_cast(text1.length()); + auto text2_length = static_cast(text2.length()); + auto max_d = (text1_length + text2_length + 1) / 2; + auto v_offset = max_d; + auto v_length = 2 * max_d; + auto v1 = std::vector(v_length, -1); + auto v2 = std::vector(v_length, -1); + v1[v_offset + 1] = 0; + v2[v_offset + 1] = 0; + auto delta = text1_length - text2_length; + // If the total number of characters is odd, then the front path will + // collide with the reverse path. + bool front = (delta % 2 != 0); + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + int64_t k1start = 0; + int64_t k1end = 0; + int64_t k2start = 0; + int64_t k2end = 0; + for (int64_t d = 0; d < max_d; d++) { + // Bail out if deadline is reached. + if (clock() > deadline) { + break; + } + + // Walk the front path one step. + for (auto k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + auto k1_offset = v_offset + k1; + int64_t x1; + if ((k1 == -d) || (k1 != d) && (v1[k1_offset - 1] < v1[k1_offset + 1])) { + x1 = v1[k1_offset + 1]; + } else { + x1 = v1[k1_offset - 1] + 1; + } + int64_t y1 = x1 - k1; + while ((x1 < text1_length) && (y1 < text2_length) && + (text1[x1] == text2[y1])) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1_length) { + // Ran off the right of the graph. + k1end += 2; + } else if (y1 > text2_length) { + // Ran off the bottom of the graph. + k1start += 2; + } else if (front) { + auto k2_offset = v_offset + delta - k1; + if ((k2_offset >= 0) && (k2_offset < v_length) && + (v2[k2_offset] != -1)) { + // Mirror x2 onto top-left coordinate system. + auto x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + + // Walk the reverse path one step. + for (auto k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + auto k2_offset = v_offset + k2; + int64_t x2; + if ((k2 == -d) || (k2 != d) && (v2[k2_offset - 1] < v2[k2_offset + 1])) { + x2 = v2[k2_offset + 1]; + } else { + x2 = v2[k2_offset - 1] + 1; + } + auto y2 = x2 - k2; + while ((x2 < text1_length) && (y2 < text2_length) && + (text1[text1_length - x2 - 1] == text2[text2_length - y2 - 1])) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1_length) { + // Ran off the left of the graph. + k2end += 2; + } else if (y2 > text2_length) { + // Ran off the top of the graph. + k2start += 2; + } else if (!front) { + auto k1_offset = v_offset + delta - k2; + if ((k1_offset >= 0) && (k1_offset < v_length) && + (v1[k1_offset] != -1)) { + auto x1 = v1[k1_offset]; + auto y1 = v_offset + x1 - k1_offset; + // Mirror x2 onto top-left coordinate system. + x2 = text1_length - v2[k2_offset]; + if (x1 >= x2) { + // Overlap detected. + return diff_bisectSplit(text1, text2, x1, y1, deadline); + } + } + } + } + } + // Diff took too long and hit the deadline or + // number of diffs equals number of characters, no commonality at all. + auto diffs = TDiffVector({Diff(DELETE, text1), Diff(INSERT, text2)}); + return diffs; +} + +TDiffVector diff_match_patch::diff_bisect(const std::string &text1, + const std::string &text2, + clock_t deadline) { + return diff_bisect(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + deadline); +} + +TDiffVector diff_match_patch::diff_bisectSplit(const std::wstring &text1, + const std::wstring &text2, + std::size_t x, std::size_t y, + clock_t deadline) { + auto text1a = text1.substr(0, x); + auto text2a = text2.substr(0, y); + auto text1b = safeMid(text1, x); + auto text2b = safeMid(text2, y); + + // Compute both diffs serially. + TDiffVector diffs = diff_main(text1a, text2a, false, deadline); + TDiffVector diffsb = diff_main(text1b, text2b, false, deadline); + + diffs.insert(diffs.end(), diffsb.begin(), diffsb.end()); + return diffs; +} + +TDiffVector diff_match_patch::diff_bisectSplit(const std::string &text1, + const std::string &text2, + std::size_t x, std::size_t y, + clock_t deadline) { + return diff_bisectSplit(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + x, y, deadline); +} + +diff_match_patch::TVariantVector diff_match_patch::diff_linesToChars( + const std::wstring &text1, const std::wstring &text2) { + TStringVector lineArray; + std::unordered_map lineHash; + // e.g. linearray[4] == "Hello\n" + // e.g. linehash.get("Hello\n") == 4 + + // "\x00" is a valid character, but various debuggers don't like it. + // So we'll insert a junk entry to avoid generating a nullptr character. + lineArray.emplace_back(L""); + + const std::wstring chars1 = + diff_linesToCharsMunge(text1, lineArray, lineHash); + const std::wstring chars2 = + diff_linesToCharsMunge(text2, lineArray, lineHash); + + TVariantVector listRet; + listRet.emplace_back(chars1); + listRet.emplace_back(chars2); + listRet.emplace_back(lineArray); + return listRet; +} + +std::vector +diff_match_patch::diff_linesToChars(const std::string &text1, + const std::string &text2) { + return diff_linesToChars(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); +} + +std::wstring diff_match_patch::diff_linesToCharsMunge( + const std::wstring &text, TStringVector &lineArray, + std::unordered_map &lineHash) { + std::size_t lineStart = 0; + std::size_t lineEnd = std::string::npos; + std::wstring line; + std::wstring chars; + // Walk the text, pulling out a substring for each line. + // text.split('\n') would would temporarily double our memory footprint. + // Modifying text would create many large strings to garbage collect. + bool firstTime = true; + while ((firstTime && (lineEnd == -1) && !text.empty()) || + lineEnd < (text.length() - 1)) { + firstTime = false; + lineEnd = text.find('\n', lineStart); + if (lineEnd == -1) { + lineEnd = text.length() - 1; + } + line = safeMid(text, lineStart, lineEnd + 1 - lineStart); + + auto pos = lineHash.find(line); + if (pos != lineHash.end()) { + chars += static_cast((*pos).second); + } else { + lineArray.emplace_back(line); + lineHash[line] = lineArray.size() - 1; + chars += static_cast(lineArray.size() - 1); + } + + lineStart = lineEnd + 1; + } + return chars; +} + +void diff_match_patch::diff_charsToLines(TDiffVector &diffs, + const TStringVector &lineArray) { + // Qt has no mutable Qforeach construct. + for (auto &&diff : diffs) { + std::wstring text; + for (auto &&y : diff.text) { + text += lineArray[y]; + } + diff.text = text; + } +} + +std::size_t diff_match_patch::diff_commonPrefix(const std::wstring &text1, + const std::wstring &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto n = std::min(text1.length(), text2.length()); + for (std::size_t i = 0; i < n; i++) { + if (text1[i] != text2[i]) { + return i; + } + } + return n; +} + +std::size_t diff_match_patch::diff_commonPrefix(const std::string &text1, + const std::string &text2) { + return diff_commonPrefix(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); +} + +std::size_t diff_match_patch::diff_commonSuffix(const std::wstring &text1, + const std::wstring &text2) { + // Performance analysis: http://neil.fraser.name/news/2007/10/09/ + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + const auto n = std::min(text1_length, text2_length); + for (std::size_t i = 1; i <= n; i++) { + if (text1[text1_length - i] != text2[text2_length - i]) { + return i - 1; + } + } + return n; +} + +std::size_t diff_match_patch::diff_commonSuffix(const std::string &text1, + const std::string &text2) { + return diff_commonSuffix(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); +} + +std::size_t diff_match_patch::diff_commonOverlap(const std::wstring &text1, + const std::wstring &text2) { + // Cache the text lengths to prevent multiple calls. + const auto text1_length = text1.length(); + const auto text2_length = text2.length(); + // Eliminate the nullptr case. + if (text1_length == 0 || text2_length == 0) { + return 0; + } + // Truncate the longer string. + std::wstring text1_trunc = text1; + std::wstring text2_trunc = text2; + if (text1_length > text2_length) { + text1_trunc = text1.substr(text1_length - text2_length); + } else if (text1_length < text2_length) { + text2_trunc = text2.substr(0, text1_length); + } + const auto text_length = std::min(text1_length, text2_length); + // Quick check for the worst case. + if (text1_trunc == text2_trunc) { + return text_length; + } + + // Start by looking for a single character match + // and increase length until no match is found. + // Performance analysis: http://neil.fraser.name/news/2010/11/04/ + std::size_t best = 0; + std::size_t length = 1; + while (true) { + std::wstring pattern = (length < text1_trunc.length()) + ? text1_trunc.substr(text_length - length) + : std::wstring(); + if (pattern.empty()) return best; + + auto found = text2_trunc.find(pattern); + if (found == std::string::npos) { + return best; + } + length += found; + if (found == 0 || text1_trunc.substr(text_length - length) == + text2_trunc.substr(0, length)) { + best = length; + length++; + } + } +} + +std::size_t diff_match_patch::diff_commonOverlap(const std::string &text1, + const std::string &text2) { + return diff_commonOverlap(NUtils::to_wstring(text1), + NUtils::to_wstring(text2)); +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( + const std::wstring &text1, const std::wstring &text2) { + if (Diff_Timeout <= 0) { + // Don't risk returning a non-optimal diff if we have unlimited time. + return {}; + } + const std::wstring longtext = text1.length() > text2.length() ? text1 : text2; + const std::wstring shorttext = + text1.length() > text2.length() ? text2 : text1; + if (longtext.length() < 4 || shorttext.length() * 2 < longtext.length()) { + return {}; // Pointless. + } + + // First check if the second quarter is the seed for a half-match. + const TStringVector hm1 = + diff_halfMatchI(longtext, shorttext, (longtext.length() + 3) / 4); + // Check again based on the third quarter. + const TStringVector hm2 = + diff_halfMatchI(longtext, shorttext, (longtext.length() + 1) / 2); + TStringVector hm; + if (hm1.empty() && hm2.empty()) { + return {}; + } else if (hm2.empty()) { + hm = hm1; + } else if (hm1.empty()) { + hm = hm2; + } else { + // Both matched. Select the longest. + hm = hm1[4].length() > hm2[4].length() ? hm1 : hm2; + } + + // A half-match was found, sort out the return data. + if (text1.length() > text2.length()) { + return hm; + } else { + TStringVector listRet({hm[2], hm[3], hm[0], hm[1], hm[4]}); + return listRet; + } +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatch( + const std::string &text1, const std::string &text2) { + return diff_halfMatch(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( + const std::wstring &longtext, const std::wstring &shorttext, + std::size_t i) { + // Start with a 1/4 length substring at position i as a seed. + const std::wstring seed = safeMid(longtext, i, longtext.length() / 4); + std::size_t j = std::string::npos; + std::wstring best_common; + std::wstring best_longtext_a, best_longtext_b; + std::wstring best_shorttext_a, best_shorttext_b; + while ((j = shorttext.find(seed, j + 1)) != std::string::npos) { + const auto prefixLength = + diff_commonPrefix(safeMid(longtext, i), safeMid(shorttext, j)); + const auto suffixLength = + diff_commonSuffix(longtext.substr(0, i), shorttext.substr(0, j)); + if (best_common.length() < suffixLength + prefixLength) { + best_common = safeMid(shorttext, j - suffixLength, suffixLength) + + safeMid(shorttext, j, prefixLength); + best_longtext_a = longtext.substr(0, i - suffixLength); + best_longtext_b = safeMid(longtext, i + prefixLength); + best_shorttext_a = shorttext.substr(0, j - suffixLength); + best_shorttext_b = safeMid(shorttext, j + prefixLength); + } + } + if (best_common.length() * 2 >= longtext.length()) { + TStringVector listRet({best_longtext_a, best_longtext_b, best_shorttext_a, + best_shorttext_b, best_common}); + return listRet; + } else { + return {}; + } +} + +diff_match_patch::TStringVector diff_match_patch::diff_halfMatchI( + const std::string &longtext, const std::string &shorttext, std::size_t i) { + return diff_halfMatchI(NUtils::to_wstring(longtext), + NUtils::to_wstring(shorttext), i); +} + +void diff_match_patch::diff_cleanupSemantic(TDiffVector &diffs) { + if (diffs.empty()) return; + + bool changes = false; + // Stack of indices where equalities are found. + std::stack equalities; // stack of equalities + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Number of characters that changed prior to the equality. + std::size_t length_insertions1 = 0; + std::size_t length_deletions1 = 0; + // Number of characters that changed after the equality. + std::size_t length_insertions2 = 0; + std::size_t length_deletions2 = 0; + while (pointer < diffs.size()) { + if (diffs[pointer].operation == EQUAL) { // Equality found. + equalities.push(pointer); + length_insertions1 = length_insertions2; + length_deletions1 = length_deletions2; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality = diffs[pointer].text; + } else { // an insertion or deletion + if (diffs[pointer].operation == INSERT) { + length_insertions2 += diffs[pointer].text.length(); + } else { + length_deletions2 += diffs[pointer].text.length(); + } + // Eliminate an equality that is smaller or equal to the edits on both + // sides of it. + if (!lastEquality.empty() && + (lastEquality.length() <= + std::max(length_insertions1, length_deletions1)) && + (lastEquality.length() <= + std::max(length_insertions2, length_deletions2))) { + // Duplicate record. + diffs.insert(diffs.begin() + equalities.top(), + Diff(DELETE, lastEquality)); + // Change second copy to insert. + diffs[equalities.top() + 1].operation = INSERT; + // Throw away the equality we just deleted. + equalities.pop(); + if (!equalities.empty()) { + equalities.pop(); + } + pointer = !equalities.empty() ? equalities.top() : -1; + length_insertions1 = 0; // Reset the counters. + length_deletions1 = 0; + length_insertions2 = 0; + length_deletions2 = 0; + lastEquality.clear(); + changes = true; + } + } + pointer++; + } + + // Normalize the diff. + if (changes) { + diff_cleanupMerge(diffs); + } + diff_cleanupSemanticLossless(diffs); + + // Find any overlaps between deletions and insertions. + // e.g: abcxxxxxxdef + // -> abcxxxdef + // e.g: xxxabcdefxxx + // -> defxxxabc + // Only extract an overlap if it is as big as the edit ahead or behind it. + pointer = 1; + while (pointer < diffs.size()) { + if (diffs[pointer - 1].operation == DELETE && + diffs[pointer].operation == INSERT) { + auto deletion = diffs[pointer - 1].text; + auto insertion = diffs[pointer].text; + std::size_t overlap_length1 = diff_commonOverlap(deletion, insertion); + std::size_t overlap_length2 = diff_commonOverlap(insertion, deletion); + if (overlap_length1 >= overlap_length2) { + if (overlap_length1 >= deletion.length() / 2.0 || + overlap_length1 >= insertion.length() / 2.0) { + // Overlap found. + // Insert an equality and trim the surrounding edits. + diffs.emplace(diffs.begin() + pointer, EQUAL, + insertion.substr(0, overlap_length1)); + diffs[pointer - 1].text = + deletion.substr(0, deletion.length() - overlap_length1); + diffs[pointer + 1].text = insertion.substr(overlap_length1); + pointer++; + } + } else { + if (overlap_length2 >= deletion.length() / 2.0 || + overlap_length2 >= insertion.length() / 2.0) { + // Reverse overlap found. + // Insert an equality and swap and trim the surrounding edits. + diffs.emplace(diffs.begin() + pointer, EQUAL, + deletion.substr(0, overlap_length2)); + diffs[pointer - 1].operation = INSERT; + diffs[pointer - 1].text = + insertion.substr(0, insertion.length() - overlap_length2); + diffs[pointer + 1].operation = DELETE; + diffs[pointer + 1].text = deletion.substr(overlap_length2); + pointer++; + } + } + pointer++; + } + pointer++; + } +} + +void diff_match_patch::diff_cleanupSemanticLossless(TDiffVector &diffs) { + int pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while ((pointer != -1) && !diffs.empty() && (pointer < (diffs.size() - 1))) { + if (diffs[pointer - 1].operation == EQUAL && + diffs[pointer + 1].operation == EQUAL) { + // This is a single edit surrounded by equalities. + auto equality1 = diffs[pointer - 1].text; + auto edit = diffs[pointer].text; + auto equality2 = diffs[pointer + 1].text; + + // First, shift the edit as far left as possible. + auto commonOffset = diff_commonSuffix(equality1, edit); + if (commonOffset > 0) { + auto commonString = safeMid(edit, edit.length() - commonOffset); + equality1 = equality1.substr(0, equality1.length() - commonOffset); + edit = commonString + edit.substr(0, edit.length() - commonOffset); + equality2 = commonString + equality2; + } + + // Second, step character by character right, + // looking for the best fit. + auto bestEquality1 = equality1; + auto bestEdit = edit; + auto bestEquality2 = equality2; + auto bestScore = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + while (!edit.empty() && !equality2.empty() && edit[0] == equality2[0]) { + equality1 += edit[0]; + edit = edit.substr(1) + equality2[0]; + equality2 = equality2.substr(1); + auto score = diff_cleanupSemanticScore(equality1, edit) + + diff_cleanupSemanticScore(edit, equality2); + // The >= encourages trailing rather than leading whitespace on + // edits. + if (score >= bestScore) { + bestScore = score; + bestEquality1 = equality1; + bestEdit = edit; + bestEquality2 = equality2; + } + } + + if (diffs[pointer - 1].text != bestEquality1) { + // We have an improvement, save it back to the diff. + if (!bestEquality1.empty()) { + diffs[pointer - 1].text = bestEquality1; + } else { + diffs.erase(diffs.begin() + pointer - 1); + pointer--; + } + diffs[pointer].text = bestEdit; + if (!bestEquality2.empty()) { + diffs[pointer + 1].text = bestEquality2; + } else { + diffs.erase(diffs.begin() + pointer + 1); + pointer--; + } + } + } + pointer++; + } +} + +int64_t diff_match_patch::diff_cleanupSemanticScore(const std::wstring &one, + const std::wstring &two) { + if (one.empty() || two.empty()) { + // Edges are the best. + return 6; + } + + // Each port of this function behaves slightly differently due to + // subtle differences in each language's definition of things like + // 'whitespace'. Since this function's purpose is largely cosmetic, + // the choice has been made to use each language's native features + // rather than force total conformity. + auto char1 = one[one.length() - 1]; + auto char2 = two[0]; + bool nonAlphaNumeric1 = !std::iswalnum(char1); + bool nonAlphaNumeric2 = !std::iswalnum(char2); + bool whitespace1 = nonAlphaNumeric1 && std::iswspace(char1); + bool whitespace2 = nonAlphaNumeric2 && std::iswspace(char2); + bool lineBreak1 = whitespace1 && std::iswcntrl(char1); + bool lineBreak2 = whitespace2 && std::iswcntrl(char2); + bool blankLine1 = lineBreak1 && std::regex_search(one, BLANKLINEEND); + bool blankLine2 = lineBreak2 && std::regex_search(two, BLANKLINESTART); + + if (blankLine1 || blankLine2) { + // Five points for blank lines. + return 5; + } else if (lineBreak1 || lineBreak2) { + // Four points for line breaks. + return 4; + } else if (nonAlphaNumeric1 && !whitespace1 && whitespace2) { + // Three points for end of sentences. + return 3; + } else if (whitespace1 || whitespace2) { + // Two points for whitespace. + return 2; + } else if (nonAlphaNumeric1 || nonAlphaNumeric2) { + // One point for non-alphanumeric. + return 1; + } + return 0; +} + +int64_t diff_match_patch::diff_cleanupSemanticScore(const std::string &one, + const std::string &two) { + return diff_cleanupSemanticScore(NUtils::to_wstring(one), + NUtils::to_wstring(two)); +} + +// Define some regex patterns for matching boundaries. +std::wregex diff_match_patch::BLANKLINEEND = std::wregex(LR"(\n\r?\n$)"); +std::wregex diff_match_patch::BLANKLINESTART = std::wregex(LR"(^\r?\n\r?\n)"); + +void diff_match_patch::diff_cleanupEfficiency(TDiffVector &diffs) { + bool changes = false; + // Stack of indices where equalities are found. + std::stack equalities; + // Always equal to equalities[equalitiesLength-1][1] + std::wstring lastEquality; + std::size_t pointer = 0; // Index of current position. + // Is there an insertion operation before the last equality. + bool pre_ins = false; + // Is there a deletion operation before the last equality. + bool pre_del = false; + // Is there an insertion operation after the last equality. + bool post_ins = false; + // Is there a deletion operation after the last equality. + bool post_del = false; + while (pointer < diffs.size()) { + if (diffs[pointer].operation == EQUAL) { // Equality found. + if (diffs[pointer].text.length() < Diff_EditCost && + (post_ins || post_del)) { + // Candidate found. + equalities.push(pointer); + pre_ins = post_ins; + pre_del = post_del; + lastEquality = diffs[pointer].text; + } else { + // Not a candidate, and can never become one. + equalities = {}; + lastEquality.clear(); + } + post_ins = post_del = false; + } else { // An insertion or deletion. + if (diffs[pointer].operation == DELETE) { + post_del = true; + } else { + post_ins = true; + } + /* + * Five types to be split: + * ABXYCD + * AXCD + * ABXC + * AXCD + * ABXC + */ + if ((lastEquality.length() != 0) && + ((pre_ins && pre_del && post_ins && post_del) || + ((lastEquality.length() < Diff_EditCost / 2) && + ((pre_ins ? 1 : 0) + (pre_del ? 1 : 0) + (post_ins ? 1 : 0) + + (post_del ? 1 : 0)) == 3))) { + // Duplicate record. + diffs.emplace(diffs.begin() + equalities.top(), DELETE, lastEquality); + // Change second copy to insert. + diffs[equalities.top() + 1].operation = INSERT; + equalities.pop(); // Throw away the equality we just deleted. + lastEquality.clear(); + if (pre_ins && pre_del) { + // No changes made which could affect previous entry, keep going. + post_ins = post_del = true; + equalities = {}; + } else { + if (!equalities.empty()) { + equalities.pop(); + } + + pointer = !equalities.empty() ? equalities.top() : -1; + post_ins = post_del = false; + } + changes = true; + } + } + pointer++; + } + + if (changes) { + diff_cleanupMerge(diffs); + } +} + +void diff_match_patch::diff_cleanupMerge(TDiffVector &diffs) { + diffs.emplace_back(EQUAL, L""); + int pointer = 0; + int count_delete = 0; + int count_insert = 0; + std::wstring text_delete; + std::wstring text_insert; + + while (pointer < diffs.size()) { + switch (diffs[pointer].operation) { + case INSERT: + count_insert++; + text_insert += diffs[pointer].text; + pointer++; + break; + case DELETE: + count_delete++; + text_delete += diffs[pointer].text; + pointer++; + break; + case EQUAL: + // Upon reaching an equality, check for prior redundancies. + if (count_delete + count_insert > 1) { + if (count_delete != 0 && count_insert != 0) { + // Factor out any common prefixies. + auto commonlength = diff_commonPrefix(text_insert, text_delete); + if (commonlength != 0) { + if ((pointer > (count_delete + count_insert)) && + diffs[pointer - (count_delete + count_insert) - 1] + .operation == EQUAL) { + diffs[pointer - count_delete - count_insert - 1].text += + text_insert.substr(0, commonlength); + } else { + diffs.emplace(diffs.begin(), EQUAL, + text_insert.substr(0, commonlength)); + pointer++; + } + text_insert = text_insert.substr(commonlength); + text_delete = text_delete.substr(commonlength); + } + // Factor out any common suffixies. + commonlength = diff_commonSuffix(text_insert, text_delete); + if (commonlength != 0) { + diffs[pointer].text = + safeMid(text_insert, text_insert.length() - commonlength) + + diffs[pointer].text; + text_insert = + text_insert.substr(0, text_insert.length() - commonlength); + text_delete = + text_delete.substr(0, text_delete.length() - commonlength); + } + } + // Delete the offending records and add the merged ones. + pointer -= count_delete + count_insert; + NUtils::Splice(diffs, pointer, count_delete + count_insert); + if (!text_delete.empty()) { + NUtils::Splice(diffs, pointer, 0, {Diff(DELETE, text_delete)}); + pointer++; + } + if (!text_insert.empty()) { + NUtils::Splice(diffs, pointer, 0, {Diff(INSERT, text_insert)}); + pointer++; + } + pointer++; + } else if (pointer != 0 && diffs[pointer - 1].operation == EQUAL) { + // Merge this equality with the previous one. + diffs[pointer - 1].text += diffs[pointer].text; + diffs.erase(diffs.begin() + pointer); + } else { + pointer++; + } + count_insert = 0; + count_delete = 0; + text_delete.clear(); + text_insert.clear(); + break; + } + } + if (diffs.back().text.empty()) { + diffs.pop_back(); // Remove the dummy entry at the end. + } + + // Second pass: look for single edits surrounded on both sides by + // equalities which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + bool changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (!diffs.empty() && pointer < (diffs.size() - 1)) { + if (diffs[pointer - 1].operation == EQUAL && + diffs[pointer + 1].operation == EQUAL) { + // This is a single edit surrounded by equalities. + if (NUtils::endsWith(diffs[pointer].text, diffs[pointer - 1].text)) { + // Shift the edit over the previous equality. + diffs[pointer].text = + diffs[pointer - 1].text + + diffs[pointer].text.substr(0, diffs[pointer].text.length() - + diffs[pointer - 1].text.length()); + diffs[pointer + 1].text = + diffs[pointer - 1].text + diffs[pointer + 1].text; + NUtils::Splice(diffs, pointer - 1, 1); + changes = true; + } else if (diffs[pointer].text.find(diffs[pointer + 1].text) == 0) { + // Shift the edit over the next equality. + diffs[pointer - 1].text += diffs[pointer + 1].text; + diffs[pointer].text = + diffs[pointer].text.substr(diffs[pointer + 1].text.length()) + + diffs[pointer + 1].text; + NUtils::Splice(diffs, pointer + 1, 1); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) { + diff_cleanupMerge(diffs); + } +} +std::size_t diff_match_patch::diff_xIndex(const TDiffVector &diffs, + std::size_t loc) { + std::size_t chars1 = 0; + std::size_t chars2 = 0; + std::size_t last_chars1 = 0; + std::size_t last_chars2 = 0; + Diff lastDiff; + for (auto &&aDiff : diffs) { + if (aDiff.operation != INSERT) { + // Equality or deletion. + chars1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + // Equality or insertion. + chars2 += aDiff.text.length(); + } + if (chars1 > loc) { + // Overshot the location. + lastDiff = aDiff; + break; + } + last_chars1 = chars1; + last_chars2 = chars2; + } + if (lastDiff.operation == DELETE) { + // The location was deleted. + return last_chars2; + } + // Add the remaining character length. + return last_chars2 + (loc - last_chars1); +} + +std::wstring diff_match_patch::diff_prettyHtml(const TDiffVector &diffs) { + std::wstring html; + std::wstring text; + for (auto &&aDiff : diffs) { + text = aDiff.text; + NUtils::replace(text, L"&", L"&"); + NUtils::replace(text, L"<", L"<"); + NUtils::replace(text, L">", L">"); + NUtils::replace(text, L"\n", L"¶
"); + switch (aDiff.operation) { + case INSERT: + html += std::wstring(L"") + text + + std::wstring(L""); + break; + case DELETE: + html += std::wstring(L"") + text + + std::wstring(L""); + break; + case EQUAL: + html += std::wstring(L"") + text + std::wstring(L""); + break; + } + } + return html; +} + +std::wstring diff_match_patch::diff_prettyConsole(const TDiffVector &diffs) { + static std::wstring kRed{L"\033[0;31m"}; + static std::wstring kGreen{L"\033[0;32m"}; + static std::wstring kYellow{L"\033[0;33m"}; + static std::wstring kReset{L"\033[m"}; + static std::wstring kEOL{NUtils::fromPercentEncoding(L"%C2%B6") + L"\n"}; + + std::wstring retVal; + std::wstring text; + for (auto &&aDiff : diffs) { + text = aDiff.text; + NUtils::replace(text, L"\n", kEOL); + switch (aDiff.operation) { + case INSERT: + retVal += kGreen + text + kReset; + break; + case DELETE: + retVal += kRed + text + kReset; + break; + case EQUAL: + retVal += text; + break; + } + } + return retVal; +} + +std::wstring diff_match_patch::diff_text1(const TDiffVector &diffs) { + std::wstring text; + for (auto &&aDiff : diffs) { + if (aDiff.operation != INSERT) { + text += aDiff.text; + } + } + return text; +} + +std::wstring diff_match_patch::diff_text2(const TDiffVector &diffs) { + std::wstring text; + for (auto &&aDiff : diffs) { + if (aDiff.operation != DELETE) { + text += aDiff.text; + } + } + return text; +} + +std::size_t diff_match_patch::diff_levenshtein(const TDiffVector &diffs) { + std::size_t levenshtein = 0; + std::size_t insertions = 0; + std::size_t deletions = 0; + for (auto &&aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + insertions += aDiff.text.length(); + break; + case DELETE: + deletions += aDiff.text.length(); + break; + case EQUAL: + // A deletion and an insertion is one substitution. + levenshtein += std::max(insertions, deletions); + insertions = 0; + deletions = 0; + break; + } + } + levenshtein += std::max(insertions, deletions); + return levenshtein; +} + +std::wstring diff_match_patch::diff_toDelta(const TDiffVector &diffs) { + std::wstring text; + for (auto &&aDiff : diffs) { + switch (aDiff.operation) { + case INSERT: + text += L"+" + + NUtils::toPercentEncoding(aDiff.text, L" !~*'();/?:@&=+$,#") + + L"\t"; + break; + case DELETE: + text += L"-" + std::to_wstring(aDiff.text.length()) + L"\t"; + break; + case EQUAL: + text += L"=" + std::to_wstring(aDiff.text.length()) + L"\t"; + break; + } + } + if (!text.empty()) { + // Strip off trailing tab character. + text = text.substr(0, text.length() - 1); + } + return text; +} + +TDiffVector diff_match_patch::diff_fromDelta(const std::wstring &text1, + const std::wstring &delta) { + TDiffVector diffs; + std::size_t pointer = 0; // Cursor in text1 + auto tokens = NUtils::splitString(delta, L"\t", false); + for (auto &&token : tokens) { + if (token.empty()) { + // Blank tokens are ok (from a trailing \t). + continue; + } + // Each token begins with a one character parameter which specifies the + // operation of this token (delete, insert, equality). + std::wstring param = safeMid(token, 1); + switch (token[0]) { + case '+': + NUtils::replace(param, L"+", L"%2b"); + param = NUtils::fromPercentEncoding(param); + diffs.emplace_back(INSERT, param); + break; + case '-': + // Fall through. + case '=': { + auto n = NUtils::toInt(param); + if (n < 0) { + throw std::wstring(L"Negative number in diff_fromDelta: " + param); + } + std::wstring text; + if ((pointer + n) > text1.length()) { + throw std::wstring(L"Delta length (" + std::to_wstring(pointer + n) + + L") larger than source text length (" + + std::to_wstring(text1.length()) + L")."); + } + + text = safeMid(text1, pointer, n); + pointer += n; + if (token[0] == L'=') { + diffs.emplace_back(EQUAL, text); + } else { + diffs.emplace_back(DELETE, text); + } + break; + } + default: + throw std::wstring(L"Invalid diff operation in diff_fromDelta: " + + token[0]); + } + } + if (pointer != text1.length()) { + throw std::wstring(L"Delta length (") + std::to_wstring(pointer) + + L") smaller than source text length (" + + std::to_wstring(text1.length()) + L")"; + } + return diffs; +} + +TDiffVector diff_match_patch::diff_fromDelta(const std::string &text1, + const std::string &delta) { + return diff_fromDelta(NUtils::to_wstring(text1), NUtils::to_wstring(delta)); +} + +// MATCH FUNCTIONS + +std::size_t diff_match_patch::match_main(const std::wstring &text, + const std::wstring &pattern, + std::size_t loc) { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + loc = std::max(kZERO, std::min(loc, text.length())); + if (text == pattern) { + // Shortcut (potentially not guaranteed by the algorithm) + return 0; + } else if (text.empty()) { + // Nothing to match. + return -1; + } else if (loc + pattern.length() <= text.length() && + safeMid(text, loc, pattern.length()) == pattern) { + // Perfect match at the perfect spot! (Includes case of nullptr pattern) + return loc; + } else { + // Do a fuzzy compare. + return match_bitap(text, pattern, loc); + } +} + +std::size_t diff_match_patch::match_main(const std::string &text, + const std::string &pattern, + std::size_t loc) { + return match_main(NUtils::to_wstring(text), NUtils::to_wstring(pattern), loc); +} + +std::size_t diff_match_patch::match_bitap(const std::wstring &text, + const std::wstring &pattern, + std::size_t loc) { + if (!(Match_MaxBits == 0 || pattern.length() <= Match_MaxBits)) { + throw "Pattern too long for this application."; + } + + // Initialise the alphabet. + auto &&s = match_alphabet(pattern); + + // Highest score beyond which we give up. + double score_threshold = Match_Threshold; + // Is there a nearby exact match? (speedup) + auto best_loc = text.find(pattern, loc); + if (best_loc != std::string::npos) { + score_threshold = + std::min(match_bitapScore(0, best_loc, loc, pattern), score_threshold); + // What about in the other direction? (speedup) + auto start = std::min(loc + pattern.length(), text.length()); + best_loc = text.rfind(pattern, start); + if (best_loc != std::string::npos) { + score_threshold = std::min(match_bitapScore(0, best_loc, loc, pattern), + score_threshold); + } + } + + // Initialise the bit arrays. + auto matchmask = 1 << (pattern.length() - 1); + best_loc = std::string::npos; + + std::size_t bin_min, bin_mid; + auto bin_max = pattern.length() + text.length(); + std::vector rd; + std::vector last_rd; + for (int d = 0; d < pattern.length(); d++) { + // Scan for the best match; each iteration allows for one more error. + // Run a binary search to determine how far from 'loc' we can stray at + // this error level. + bin_min = 0; + bin_mid = bin_max; + while (bin_min < bin_mid) { + if (match_bitapScore(d, loc + bin_mid, loc, pattern) <= score_threshold) { + bin_min = bin_mid; + } else { + bin_max = bin_mid; + } + bin_mid = (bin_max - bin_min) / 2 + bin_min; + } + // Use the result from this iteration as the maximum for the next. + bin_max = bin_mid; + auto start = std::max(kONE, (loc > bin_mid) ? (loc - bin_mid + 1) : kZERO); + auto finish = std::min(loc + bin_mid, text.length()) + pattern.length(); + + rd = std::vector(finish + 2, 0); + rd[finish + 1] = (1 << d) - 1; + for (auto j = finish; (j != -1) && (j >= start); j--) { + int64_t charMatch; + if (text.length() <= j - 1) { + // Out of range. + charMatch = 0; + } else { + auto pos = s.find(text[j - 1]); + if (pos == s.end()) + charMatch = 0; + else + charMatch = (*pos).second; + } + if (d == 0) { + // First pass: exact match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch; + } else { + // Subsequent passes: fuzzy match. + rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | + (((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]; + } + if ((rd[j] & matchmask) != 0) { + double score = match_bitapScore(d, j - 1, loc, pattern); + // This match will almost certainly be better than any existing + // match. But check anyway. + if (score <= score_threshold) { + // Told you so. + score_threshold = score; + best_loc = j - 1; + if (best_loc > loc) { + // When passing loc, don't exceed our current distance from loc. + start = + std::max(kONE, (2 * loc > best_loc) ? 2 * loc - best_loc : 1); + } else { + // Already passed loc, downhill from here on in. + break; + } + } + } + } + if (match_bitapScore(d + 1, loc, loc, pattern) > score_threshold) { + // No hope for a (better) match at greater error levels. + break; + } + last_rd = std::move(rd); + } + return best_loc; +} + +std::size_t diff_match_patch::match_bitap(const std::string &text, + const std::string &pattern, + std::size_t loc) { + return match_bitap(NUtils::to_wstring(text), NUtils::to_wstring(pattern), + loc); +} + +double diff_match_patch::match_bitapScore(int64_t e, int64_t x, int64_t loc, + const std::wstring &pattern) { + const float accuracy = static_cast(e) / pattern.length(); + const auto proximity = std::abs(loc - x); + if (Match_Distance == 0) { + // Dodge divide by zero error. + return proximity == 0 ? accuracy : 1.0; + } + return accuracy + (proximity / static_cast(Match_Distance)); +} + +diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( + const std::wstring &pattern) { + TCharPosMap s; + std::size_t i; + for (i = 0; i < pattern.length(); i++) { + auto c = pattern[i]; + s[c] = 0; + } + for (i = 0; i < pattern.length(); i++) { + auto c = pattern[i]; + auto pos = s.find(c); + std::size_t prev = 0; + if (pos != s.end()) prev = (*pos).second; + s[c] = prev | (1ULL << (pattern.length() - i - 1)); + } + return s; +} + +diff_match_patch::TCharPosMap diff_match_patch::match_alphabet( + const std::string &pattern) { + return match_alphabet(NUtils::to_wstring(pattern)); +} + +// PATCH FUNCTIONS + +void diff_match_patch::patch_addContext(Patch &patch, + const std::wstring &text) { + if (text.empty()) { + return; + } + std::wstring pattern = safeMid(text, patch.start2, patch.length1); + std::size_t padding = 0; + + // Look for the first and last matches of pattern in text. If two different + // matches are found, increase the pattern length. + while ((text.find(pattern) != text.rfind(pattern)) && + (pattern.length() < (Match_MaxBits - Patch_Margin - Patch_Margin))) { + padding += Patch_Margin; + pattern = safeMid( + text, + std::max(kZERO, + ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), + std::min(text.length(), patch.start2 + patch.length1 + padding) - + std::max(kZERO, + (patch.start2 > padding) ? patch.start2 - padding : 0)); + } + // Add one chunk for good luck. + padding += Patch_Margin; + + // Add the prefix. + std::wstring prefix = safeMid( + text, + std::max(kZERO, + ((patch.start2 > padding) ? patch.start2 - padding : 0UL)), + patch.start2 - + std::max(kZERO, + ((patch.start2 > padding) ? patch.start2 - padding : 0UL))); + if (!prefix.empty()) { + patch.diffs.emplace(patch.diffs.begin(), EQUAL, prefix); + } + // Add the suffix. + std::wstring suffix = + safeMid(text, patch.start2 + patch.length1, + std::min(text.length(), patch.start2 + patch.length1 + padding) - + (patch.start2 + patch.length1)); + if (!suffix.empty()) { + patch.diffs.emplace_back(EQUAL, suffix); + } + + // Roll back the start points. + patch.start1 -= prefix.length(); + patch.start2 -= prefix.length(); + // Extend the lengths. + patch.length1 += prefix.length() + suffix.length(); + patch.length2 += prefix.length() + suffix.length(); +} + +void diff_match_patch::patch_addContext(Patch &patch, const std::string &text) { + return patch_addContext(patch, NUtils::to_wstring(text)); +} + +TPatchVector diff_match_patch::patch_make(const std::wstring &text1, + const std::wstring &text2) { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + // No diffs provided, compute our own. + TDiffVector diffs = diff_main(text1, text2, true); + if (diffs.size() > 2) { + diff_cleanupSemantic(diffs); + diff_cleanupEfficiency(diffs); + } + + return patch_make(text1, diffs); +} + +TPatchVector diff_match_patch::patch_make(const TDiffVector &diffs) { + // No origin string provided, compute our own. + const std::wstring text1 = diff_text1(diffs); + return patch_make(text1, diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::wstring &text1, + const std::wstring & /*text2*/, + const TDiffVector &diffs) { + // text2 is entirely unused. + return patch_make(text1, diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::wstring &text1, + const TDiffVector &diffs) { + // Check for null inputs not needed since null can't be passed via + // std::wstring + + TPatchVector patches; + if (diffs.empty()) { + return patches; // Get rid of the nullptr case. + } + Patch patch; + std::size_t char_count1 = 0; // Number of characters into the text1 string. + std::size_t char_count2 = 0; // Number of characters into the text2 string. + // Start with text1 (prepatch_text) and apply the diffs until we arrive at + // text2 (postpatch_text). We recreate the patches one by one to determine + // context info. + std::wstring prepatch_text = text1; + std::wstring postpatch_text = text1; + for (auto &&aDiff : diffs) { + if (patch.diffs.empty() && aDiff.operation != EQUAL) { + // A new patch starts here. + patch.start1 = char_count1; + patch.start2 = char_count2; + } + + switch (aDiff.operation) { + case INSERT: + patch.diffs.emplace_back(aDiff); + patch.length2 += aDiff.text.length(); + postpatch_text = postpatch_text.substr(0, char_count2) + aDiff.text + + safeMid(postpatch_text, char_count2); + break; + case DELETE: + patch.length1 += aDiff.text.length(); + patch.diffs.emplace_back(aDiff); + postpatch_text = + postpatch_text.substr(0, char_count2) + + safeMid(postpatch_text, char_count2 + aDiff.text.length()); + break; + case EQUAL: + if (aDiff.text.length() <= 2 * Patch_Margin && !patch.diffs.empty() && + !(aDiff == diffs.back())) { + // Small equality inside a patch. + patch.diffs.emplace_back(aDiff); + patch.length1 += aDiff.text.length(); + patch.length2 += aDiff.text.length(); + } + + if (aDiff.text.length() >= 2 * Patch_Margin) { + // Time for a new patch. + if (!patch.diffs.empty()) { + patch_addContext(patch, prepatch_text); + patches.emplace_back(patch); + patch = Patch(); + // Unlike Unidiff, our patch lists have a rolling context. + // http://code.google.com/p/google-diff-match-patch/wiki/Unidiff + // Update prepatch text & pos to reflect the application of the + // just completed patch. + prepatch_text = postpatch_text; + char_count1 = char_count2; + } + } + break; + } + + // Update the current character count. + if (aDiff.operation != INSERT) { + char_count1 += aDiff.text.length(); + } + if (aDiff.operation != DELETE) { + char_count2 += aDiff.text.length(); + } + } + // Pick up the leftover patch if not empty. + if (!patch.diffs.empty()) { + patch_addContext(patch, prepatch_text); + patches.emplace_back(patch); + } + + return patches; +} + +TPatchVector diff_match_patch::patch_make(const std::string &text1, + const TDiffVector &diffs) { + return patch_make(NUtils::to_wstring(text1), diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::string &text1, + const std::string &text2, + const TDiffVector &diffs) { + return patch_make(NUtils::to_wstring(text1), NUtils::to_wstring(text2), + diffs); +} + +TPatchVector diff_match_patch::patch_make(const std::string &text1, + const std::string &text2) { + return patch_make(NUtils::to_wstring(text1), NUtils::to_wstring(text2)); +} + +TPatchVector diff_match_patch::patch_deepCopy(const TPatchVector &patches) { + TPatchVector patchesCopy; + for (auto &&aPatch : patches) { + Patch patchCopy = Patch(); + for (auto &&aDiff : aPatch.diffs) { + patchCopy.diffs.emplace_back(aDiff.operation, aDiff.text); + } + patchCopy.start1 = aPatch.start1; + patchCopy.start2 = aPatch.start2; + patchCopy.length1 = aPatch.length1; + patchCopy.length2 = aPatch.length2; + patchesCopy.emplace_back(patchCopy); + } + return patchesCopy; +} + +std::pair > diff_match_patch::patch_apply( + TPatchVector patches, std::wstring text) { + if (patches.empty()) { + return {text, std::vector(0)}; + } + + // Deep copy the patches so that no changes are made to originals. + patches = patch_deepCopy(patches); + + std::wstring nullPadding = patch_addPadding(patches); + text = nullPadding + text + nullPadding; + patch_splitMax(patches); + + std::size_t x = 0; + // delta keeps track of the offset between the expected and actual location + // of the previous patch. If there are patches expected at positions 10 and + // 20, but the first patch was found at 12, delta is 2 and the second patch + // has an effective expected position of 22. + uint64_t delta = 0; + std::vector results(patches.size()); + for (auto &&aPatch : patches) { + auto expected_loc = aPatch.start2 + delta; + std::wstring text1 = diff_text1(aPatch.diffs); + std::size_t start_loc; + std::size_t end_loc = std::string::npos; + if (text1.length() > Match_MaxBits) { + // patch_splitMax will only provide an oversized pattern in the case of + // a monster delete. + start_loc = + match_main(text, text1.substr(0, Match_MaxBits), expected_loc); + if (start_loc != -1) { + end_loc = match_main(text, text1.substr(text1.length() - Match_MaxBits), + expected_loc + text1.length() - Match_MaxBits); + if (end_loc == -1 || start_loc >= end_loc) { + // Can't find valid trailing context. Drop this patch. + start_loc = -1; + } + } + } else { + start_loc = match_main(text, text1, expected_loc); + } + if (start_loc == -1) { + // No match found. :( + results[x] = false; + // Subtract the delta for this failed patch from subsequent patches. + delta -= aPatch.length2 - aPatch.length1; + } else { + // Found a match. :) + results[x] = true; + delta = start_loc - expected_loc; + std::wstring text2; + if (end_loc == -1) { + text2 = safeMid(text, start_loc, text1.length()); + } else { + text2 = safeMid(text, start_loc, end_loc + Match_MaxBits - start_loc); + } + if (text1 == text2) { + // Perfect match, just shove the replacement text in. + text = text.substr(0, start_loc) + diff_text2(aPatch.diffs) + + safeMid(text, start_loc + text1.length()); + } else { + // Imperfect match. Run a diff to get a framework of equivalent + // indices. + TDiffVector diffs = diff_main(text1, text2, false); + if (text1.length() > Match_MaxBits && + diff_levenshtein(diffs) / static_cast(text1.length()) > + Patch_DeleteThreshold) { + // The end points match, but the content is unacceptably bad. + results[x] = false; + } else { + diff_cleanupSemanticLossless(diffs); + std::size_t index1 = 0; + for (auto &&aDiff : aPatch.diffs) { + if (aDiff.operation != EQUAL) { + auto index2 = diff_xIndex(diffs, index1); + if (aDiff.operation == INSERT) { + // Insertion + text = text.substr(0, start_loc + index2) + aDiff.text + + safeMid(text, start_loc + index2); + } else if (aDiff.operation == DELETE) { + // Deletion + text = + text.substr(0, start_loc + index2) + + safeMid(text, start_loc + + diff_xIndex( + diffs, index1 + aDiff.text.length())); + } + } + if (aDiff.operation != DELETE) { + index1 += aDiff.text.length(); + } + } + } + } + } + x++; + } + // Strip the padding off. + text = safeMid(text, nullPadding.length(), + text.length() - 2 * nullPadding.length()); + return {text, results}; +} + +std::pair > diff_match_patch::patch_apply( + TPatchVector patches, std::string text) { + return patch_apply(patches, NUtils::to_wstring(text)); +} + +std::wstring diff_match_patch::patch_addPadding(TPatchVector &patches) { + auto paddingLength = Patch_Margin; + std::wstring nullPadding; + for (char x = 1; x <= paddingLength; x++) { + nullPadding += NUtils::to_wstring(x); + } + + // Bump all the patches forward. + for (auto &&aPatch : patches) { + aPatch.start1 += paddingLength; + aPatch.start2 += paddingLength; + } + + // Add some padding on start of first diff. + // auto && patch = patches.front(); + // TDiffVector & diffs = patch.diffs; + if (patches.front().diffs.empty() || + patches.front().diffs.front().operation != EQUAL) { + // Add nullPadding equality. + patches.front().diffs.emplace(patches.front().diffs.begin(), EQUAL, + nullPadding); + patches.front().start1 -= paddingLength; // Should be 0. + patches.front().start2 -= paddingLength; // Should be 0. + patches.front().length1 += paddingLength; + patches.front().length2 += paddingLength; + } else if (paddingLength > patches.front().diffs.front().text.length()) { + // Grow first equality. + auto &&firstDiff = patches.front().diffs.front(); + auto extraLength = paddingLength - firstDiff.text.length(); + firstDiff.text = + nullPadding.substr(firstDiff.text.length()) + firstDiff.text; + patches.front().start1 -= extraLength; + patches.front().start2 -= extraLength; + patches.front().length1 += extraLength; + patches.front().length2 += extraLength; + } + + // Add some padding on end of last diff. + // patch = patches.back(); + // diffs = patch.diffs; + if ((patches.back().diffs.size() == 0) || + patches.back().diffs.back().operation != EQUAL) { + // Add nullPadding equality. + patches.back().diffs.emplace_back(EQUAL, nullPadding); + patches.back().length1 += paddingLength; + patches.back().length2 += paddingLength; + } else if (paddingLength > patches.back().diffs.back().text.length()) { + // Grow last equality. + // Diff &lastDiff = patches.back().diffs.back(); + auto extraLength = + paddingLength - patches.back().diffs.back().text.length(); + patches.back().diffs.back().text += nullPadding.substr(0, extraLength); + patches.back().length1 += extraLength; + patches.back().length2 += extraLength; + } + + return nullPadding; +} + +void diff_match_patch::patch_splitMax(TPatchVector &patches) { + auto patch_size = Match_MaxBits; + for (int x = 0; x < patches.size(); x++) { + if (patches[x].length1 <= patch_size) { + continue; + } + Patch bigpatch = patches[x]; + // Remove the big old patch. + NUtils::Splice(patches, x--, 1); + auto start1 = bigpatch.start1; + auto start2 = bigpatch.start2; + std::wstring precontext; + while (!bigpatch.diffs.empty()) { + // Create one of several smaller patches. + Patch patch; + bool empty = true; + patch.start1 = start1 - precontext.length(); + patch.start2 = start2 - precontext.length(); + if (precontext.length() != 0) { + patch.length1 = patch.length2 = precontext.length(); + patch.diffs.emplace_back(EQUAL, precontext); + } + while (!bigpatch.diffs.empty() && + (patch.length1 < (patch_size - Patch_Margin))) { + auto diff_type = bigpatch.diffs[0].operation; + auto diff_text = bigpatch.diffs[0].text; + if (diff_type == INSERT) { + // Insertions are harmless. + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + patch.diffs.push_back(bigpatch.diffs.front()); + bigpatch.diffs.erase(bigpatch.diffs.begin()); + empty = false; + } else if ((diff_type == DELETE) && (patch.diffs.size() == 1) && + (patch.diffs.front().operation == EQUAL) && + (diff_text.length() > 2 * patch_size)) { + // This is a large deletion. Let it pass in one chunk. + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + empty = false; + patch.diffs.emplace_back(diff_type, diff_text); + bigpatch.diffs.erase(bigpatch.diffs.begin()); + } else { + // Deletion or equality. Only take as much as we can stomach. + diff_text = diff_text.substr( + 0, std::min(diff_text.length(), + (patch_size > (patch.length1 + Patch_Margin)) + ? (patch_size - patch.length1 - Patch_Margin) + : (-1 * 1UL))); + patch.length1 += diff_text.length(); + start1 += diff_text.length(); + if (diff_type == EQUAL) { + patch.length2 += diff_text.length(); + start2 += diff_text.length(); + } else { + empty = false; + } + patch.diffs.emplace_back(diff_type, diff_text); + if (diff_text == bigpatch.diffs[0].text) { + bigpatch.diffs.erase(bigpatch.diffs.begin()); + } else { + bigpatch.diffs[0].text = + bigpatch.diffs[0].text.substr(diff_text.length()); + } + } + } + // Compute the head context for the next patch. + precontext = diff_text2(patch.diffs); + precontext = precontext.substr( + std::max(kZERO, (precontext.length() > Patch_Margin) + ? (precontext.length() - Patch_Margin) + : 0)); + + std::wstring postcontext; + // Append the end context for this patch. + if (diff_text1(bigpatch.diffs).length() > Patch_Margin) { + postcontext = diff_text1(bigpatch.diffs).substr(0, Patch_Margin); + } else { + postcontext = diff_text1(bigpatch.diffs); + } + + if (postcontext.length() != 0) { + patch.length1 += postcontext.length(); + patch.length2 += postcontext.length(); + if ((patch.diffs.size() != 0) && + (patch.diffs[patch.diffs.size() - 1].operation == EQUAL)) { + patch.diffs[patch.diffs.size() - 1].text += postcontext; + } else { + patch.diffs.emplace_back(EQUAL, postcontext); + } + } + if (!empty) { + NUtils::Splice(patches, ++x, 0ULL, patch); + } + } + } +} + +std::wstring diff_match_patch::patch_toText(const TPatchVector &patches) { + std::wstring text; + for (auto &&aPatch : patches) { + text += aPatch.toString(); + } + return text; +} + +TPatchVector diff_match_patch::patch_fromText(const std::wstring &textline) { + TPatchVector patches; + if (textline.empty()) { + return patches; + } + auto text = NUtils::splitString(textline, L"\n", true); + int textPointer = 0; + std::wstring line; + while (textPointer < text.size()) { + patches.push_back(text[textPointer]); + auto &patch = patches.back(); + textPointer++; + + while (textPointer < text.size()) { + if (text[textPointer].empty()) { + ++textPointer; + continue; + } + + auto sign = text[textPointer][0]; + + line = text[textPointer].substr(1); + NUtils::replace(line, L"+", L"%2b"); + line = NUtils::fromPercentEncoding(line); + if (sign == '-') { + // Deletion. + patch.diffs.emplace_back(DELETE, line); + } else if (sign == '+') { + // Insertion. + patch.diffs.emplace_back(INSERT, line); + } else if (sign == ' ') { + // Minor equality. + patch.diffs.emplace_back(EQUAL, line); + } else if (sign == '@') { + // Start of next patch. + break; + } else { + // WTF? + throw std::wstring(std::wstring(L"Invalid patch mode '") + sign + + std::wstring(L" in: ") + line); + return {}; + } + textPointer++; + } + } + return patches; +} + +TPatchVector diff_match_patch::patch_fromText(const std::string &textline) { + return patch_fromText(NUtils::to_wstring(textline)); +} + +std::wstring diff_match_patch::safeMid(const std::wstring &str, + std::size_t pos) { + return safeMid(str, pos, std::string::npos); +} + +std::wstring diff_match_patch::safeMid(const std::wstring &str, std::size_t pos, + std::size_t len) { + return (pos == str.length()) ? std::wstring() : str.substr(pos, len); +} + +std::wstring NUtils::to_wstring(const diff_match_patch::TVariant &variant, + bool doubleQuoteEmpty) { + std::wstring retVal; + if (std::holds_alternative(variant)) + retVal = std::get(variant); + + if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; + + return retVal; +} + +std::wstring NUtils::to_wstring(const Patch &patch, bool doubleQuoteEmpty) { + auto retVal = patch.toString(); + if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; + return retVal; +} + +std::wstring NUtils::to_wstring(const Diff &diff, bool doubleQuoteEmpty) { + auto retVal = diff.toString(); + if (doubleQuoteEmpty && retVal.empty()) return LR"("")"; + return retVal; +} diff --git a/cpp17/diff_match_patch.h b/cpp17/diff_match_patch.h new file mode 100644 index 0000000..b0e37bd --- /dev/null +++ b/cpp17/diff_match_patch.h @@ -0,0 +1,722 @@ +/* +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_H +#define DIFF_MATCH_PATCH_H + +#include +#include +#include +#include +#include +#include +#include +#ifdef USE_GTEST +#include "gtest/gtest.h" +#endif + +/* + * Functions for diff, match and patch. + * Computes the difference between two texts to create a patch. + * Applies the patch onto another text, allowing for errors. + * + * @author fraser@google.com (Neil Fraser) + * + * Qt/C++ port by mikeslemmer@gmail.com (Mike Slemmer): + * Qt->C++17 with native STL use only, port by scott@towel42.com (Scott Aron + Bloom) + * C++17 was intentionally chosen for variant support + * + * Code known to compile with C++17 VS2022 and g++ 9.5.0 + * + * Here is a trivial sample program which works properly when linked with this + * library: + * + + #include "diff_match_patch.h" + int main(int argc, char **argv) { + auto str1 = std::string("First string in diff"); + auto str2 = std::string("Second string in diff"); + + diff_match_patch dmp; + auto strPatch = dmp.patch_toText(dmp.patch_make(str1, str2)); + auto out = dmp.patch_apply(dmp.patch_fromText(strPatch), str1); + auto strResult = out.first; + + // here, strResult will equal str2 above. + return 0; + } + +*/ + +/**- + * The data structure representing a diff is a Linked list of Diff objects: + * {Diff(Operation.DELETE, "Hello"), Diff(Operation.INSERT, "Goodbye"), + * Diff(Operation.EQUAL, " world.")} + * which means: delete "Hello", add "Goodbye" and keep " world." + */ +enum Operation { DELETE, INSERT, EQUAL }; + +/** + * Class representing one diff operation. + */ +class Diff { + public: + Operation operation{DELETE}; + // One of: INSERT, DELETE or EQUAL. + std::wstring text; + // The text associated with this diff operation. + + /** + * Constructor. Initializes the diff with the provided values. + * @param operation One of INSERT, DELETE or EQUAL. + * @param text The text being applied. + */ + Diff(Operation _operation, const std::wstring &_text); + Diff(Operation _operation, const wchar_t *_text); + Diff(Operation _operation, const std::string &_text); + Diff(Operation _operation, const char *_text); + Diff(); + inline bool isNull() const; + std::wstring toString() const; + bool operator==(const Diff &d) const; + bool operator!=(const Diff &d) const; + + static std::wstring strOperation(Operation op); +}; +using TDiffVector = std::vector; + +/** + * Class representing one patch operation. + */ +class Patch { + public: + TDiffVector diffs; + std::size_t length1{0}; + std::size_t length2{0}; + std::size_t start1{0}; + std::size_t start2{0}; + + /** + * Constructor. Initializes with an empty list of diffs. + */ + Patch(); + Patch(std::wstring &text); // modifies text, and removes the text used + bool isNull() const; + std::wstring toString() const; + + private: + std::wstring getPatchHeader() const; + std::wstring getCoordinateString(std::size_t start, std::size_t length) const; +}; +using TPatchVector = std::vector; + +/** + * Class containing the diff, match and patch methods. + * Also contains the behaviour settings. + */ + +class diff_match_patch { + friend class diff_match_patch_test; +#ifdef USE_GTEST + FRIEND_TEST(diff_match_patch_test, testDiffCommonOverlap); + FRIEND_TEST(diff_match_patch_test, testDiffHalfmatch); + FRIEND_TEST(diff_match_patch_test, testDiffLinesToChars); + FRIEND_TEST(diff_match_patch_test, testDiffCharsToLines); + FRIEND_TEST(diff_match_patch_test, testDiffBisect); + FRIEND_TEST(diff_match_patch_test, testMatchAlphabet); + FRIEND_TEST(diff_match_patch_test, testMatchBitap); + FRIEND_TEST(diff_match_patch_test, testPatchAddContext); +#endif + public: + // Defaults. + // Set these on your diff_match_patch instance to override the defaults. + + // Number of seconds to map a diff before giving up (0 for infinity). + float Diff_Timeout{1.0f}; + // Cost of an empty edit operation in terms of edit characters. + short Diff_EditCost{4}; + // At what point is no match declared (0.0 = perfection, 1.0 = very loose). + float Match_Threshold{0.5f}; + // How far to search for a match (0 = exact location, 1000+ = broad match). + // A match this many characters away from the expected location will add + // 1.0 to the score (0.0 is a perfect match). + int64_t Match_Distance{1000}; + // When deleting a large block of text (over ~64 characters), how close does + // the contents have to match the expected contents. (0.0 = perfection, + // 1.0 = very loose). Note that Match_Threshold controls how closely the + // end points of a delete need to match. + float Patch_DeleteThreshold{0.5f}; + // Chunk size for context length. + short Patch_Margin{4}; + + short Match_MaxBits{32}; // unit tests are based on 32 bits + + private: + // Define some regex patterns for matching boundaries. + static std::wregex BLANKLINEEND; + static std::wregex BLANKLINESTART; + + public: + using TStringVector = std::vector; + using TVariant = std::variant; + using TVariantVector = std::vector; + using TCharPosMap = std::map; + + diff_match_patch(); + + // DIFF FUNCTIONS + + /** + * Find the differences between two texts. + * Run a faster slightly less optimal diff. + * This method allows the 'checklines' of diff_main() to be optional. + * Most of the time checklines is wanted, so default to true. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2); + TDiffVector diff_main(const std::string &text1, const std::string &text2); + + /** + * Find the differences between two texts. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @return Linked List of Diff objects. + */ + TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2, + bool checklines); + TDiffVector diff_main(const std::string &text1, const std::string &text2, + bool checklines); + + /** + * Find the differences between two texts. Simplifies the problem by + * stripping any common prefix or suffix off the texts before diffing. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. Used + * internally for recursive calls. Users should set DiffTimeout instead. + * @return Linked List of Diff objects. + */ + private: + TDiffVector diff_main(const std::wstring &text1, const std::wstring &text2, + bool checklines, clock_t deadline); + TDiffVector diff_main(const std::string &text1, const std::string &text2, + bool checklines, clock_t deadline); + + /** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param checklines Speedup flag. If false, then don't run a + * line-level diff first to identify the changed areas. + * If true, then run a faster slightly less optimal diff. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + TDiffVector diff_compute(const std::wstring &text1, const std::wstring &text2, + bool checklines, clock_t deadline); + TDiffVector diff_compute(const std::string &text1, const std::string &text2, + bool checklines, clock_t deadline); + + /** + * Do a quick line-level diff on both strings, then rediff the parts for + * greater accuracy. + * This speedup can produce non-minimal diffs. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param deadline Time when the diff should be complete by. + * @return Linked List of Diff objects. + */ + private: + TDiffVector diff_lineMode(std::wstring text1, std::wstring text2, + clock_t deadline); + TDiffVector diff_lineMode(std::string text1, std::string text2, + clock_t deadline); + + /** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return Linked List of Diff objects. + */ + protected: + TDiffVector diff_bisect(const std::wstring &text1, const std::wstring &text2, + clock_t deadline); + TDiffVector diff_bisect(const std::string &text1, const std::string &text2, + clock_t deadline); + + /** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @param deadline Time at which to bail if not yet complete. + * @return LinkedList of Diff objects. + */ + private: + TDiffVector diff_bisectSplit(const std::wstring &text1, + const std::wstring &text2, std::size_t x, + std::size_t y, clock_t deadline); + TDiffVector diff_bisectSplit(const std::string &text1, + const std::string &text2, std::size_t x, + std::size_t y, clock_t deadline); + + /** + * Split two texts into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text1 First string. + * @param text2 Second string. + * @return Three element Object array, containing the encoded text1, the + * encoded text2 and the List of unique strings. The zeroth element + * of the List of unique strings is intentionally blank. + */ + protected: + std::vector diff_linesToChars( + const std::wstring &text1, + const std::wstring &text2); // return elems 0 and 1 are std::wstring, + // elem 2 is TStringVector + std::vector diff_linesToChars(const std::string &text1, + const std::string &text2); + + /** + * Split a text into a list of strings. Reduce the texts to a string of + * hashes where each Unicode character represents one line. + * @param text String to encode. + * @param lineArray List of unique strings. + * @param lineHash Map of strings to indices. + * @return Encoded string. + */ + private: + std::wstring diff_linesToCharsMunge( + const std::wstring &text, TStringVector &lineArray, + std::unordered_map &lineHash); + + /** + * Rehydrate the text in a diff from a string of line hashes to real lines of + * text. + * @param diffs LinkedList of Diff objects. + * @param lineArray List of unique strings. + */ + private: + void diff_charsToLines(TDiffVector &diffs, const TStringVector &lineArray); + + /** + * Determine the common prefix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the start of each string. + */ + public: + std::size_t diff_commonPrefix(const std::wstring &text1, + const std::wstring &text2); + std::size_t diff_commonPrefix(const std::string &text1, + const std::string &text2); + + /** + * Determine the common suffix of two strings. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of each string. + */ + public: + std::size_t diff_commonSuffix(const std::wstring &text1, + const std::wstring &text2); + std::size_t diff_commonSuffix(const std::string &text1, + const std::string &text2); + + /** + * Determine if the suffix of one string is the prefix of another. + * @param text1 First string. + * @param text2 Second string. + * @return The number of characters common to the end of the first + * string and the start of the second string. + */ + protected: + std::size_t diff_commonOverlap(const std::wstring &text1, + const std::wstring &text2); + std::size_t diff_commonOverlap(const std::string &text1, + const std::string &text2); + + /** + * Do the two texts share a substring which is at least half the length of + * the longer text? + * This speedup can produce non-minimal diffs. + * @param text1 First string. + * @param text2 Second string. + * @return Five element String array, containing the prefix of text1, the + * suffix of text1, the prefix of text2, the suffix of text2 and the + * common middle. Or null if there was no match. + */ + protected: + TStringVector diff_halfMatch(const std::wstring &text1, + const std::wstring &text2); + TStringVector diff_halfMatch(const std::string &text1, + const std::string &text2); + + /** + * Does a substring of shorttext exist within longtext such that the + * substring is at least half the length of longtext? + * @param longtext Longer string. + * @param shorttext Shorter string. + * @param i Start index of quarter length substring within longtext. + * @return Five element String array, containing the prefix of longtext, the + * suffix of longtext, the prefix of shorttext, the suffix of shorttext + * and the common middle. Or null if there was no match. + */ + private: + TStringVector diff_halfMatchI(const std::wstring &longtext, + const std::wstring &shorttext, std::size_t i); + TStringVector diff_halfMatchI(const std::string &longtext, + const std::string &shorttext, std::size_t i); + + /** + * Reduce the number of edits by eliminating semantically trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemantic(TDiffVector &diffs); + + /** + * Look for single edits surrounded on both sides by equalities + * which can be shifted sideways to align the edit to a word boundary. + * e.g: The cat came. -> The cat came. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupSemanticLossless(TDiffVector &diffs); + + /** + * Given two strings, compute a score representing whether the internal + * boundary falls on logical boundaries. + * Scores range from 6 (best) to 0 (worst). + * @param one First string. + * @param two Second string. + * @return The score. + */ + private: + int64_t diff_cleanupSemanticScore(const std::wstring &one, + const std::wstring &two); + int64_t diff_cleanupSemanticScore(const std::string &one, + const std::string &two); + + /** + * Reduce the number of edits by eliminating operationally trivial equalities. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupEfficiency(TDiffVector &diffs); + + /** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * @param diffs LinkedList of Diff objects. + */ + public: + void diff_cleanupMerge(TDiffVector &diffs); + + /** + * loc is a location in text1, compute and return the equivalent location in + * text2. + * e.g. "The cat" vs "The big cat", 1->1, 5->8 + * @param diffs LinkedList of Diff objects. + * @param loc Location within text1. + * @return Location within text2. + */ + public: + std::size_t diff_xIndex(const TDiffVector &diffs, std::size_t loc); + + /** + * Convert a Diff list into a pretty HTML report. + * @param diffs LinkedList of Diff objects. + * @return HTML representation. + */ + public: + std::wstring diff_prettyHtml(const TDiffVector &diffs); + + /** + * Convert a Diff list into a pretty Console report. Red for delete, and green for insert + * @param diffs LinkedList of Diff objects. + * @return Console representation. + */ + public: + std::wstring diff_prettyConsole(const TDiffVector &diffs); + + /** + * Compute and return the source text (all equalities and deletions). + * @param diffs LinkedList of Diff objects. + * @return Source text. + */ + public: + std::wstring diff_text1(const TDiffVector &diffs); + + /** + * Compute and return the destination text (all equalities and insertions). + * @param diffs LinkedList of Diff objects. + * @return Destination text. + */ + public: + std::wstring diff_text2(const TDiffVector &diffs); + + /** + * Compute the Levenshtein distance; the number of inserted, deleted or + * substituted characters. + * @param diffs LinkedList of Diff objects. + * @return Number of changes. + */ + public: + std::size_t diff_levenshtein(const TDiffVector &diffs); + + /** + * Crush the diff into an encoded string which describes the operations + * required to transform text1 into text2. + * E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. + * Operations are tab-separated. Inserted text is escaped using %xx notation. + * @param diffs Array of diff tuples. + * @return Delta text. + */ + public: + std::wstring diff_toDelta(const TDiffVector &diffs); + + /** + * Given the original text1, and an encoded string which describes the + * operations required to transform text1 into text2, compute the full diff. + * @param text1 Source string for the diff. + * @param delta Delta text. + * @return Array of diff tuples or null if invalid. + * @throws std::wstring If invalid input. + */ + public: + TDiffVector diff_fromDelta(const std::wstring &text1, + const std::wstring &delta); + TDiffVector diff_fromDelta(const std::string &text1, + const std::string &delta); + + // MATCH FUNCTIONS + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc'. + * Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + public: + std::size_t match_main(const std::wstring &text, const std::wstring &pattern, + std::size_t loc); + std::size_t match_main(const std::string &text, const std::string &pattern, + std::size_t loc); + + /** + * Locate the best instance of 'pattern' in 'text' near 'loc' using the + * Bitap algorithm. Returns -1 if no match found. + * @param text The text to search. + * @param pattern The pattern to search for. + * @param loc The location to search around. + * @return Best match index or -1. + */ + protected: + std::size_t match_bitap(const std::wstring &text, const std::wstring &pattern, + std::size_t loc); + std::size_t match_bitap(const std::string &text, const std::string &pattern, + std::size_t loc); + + /** + * Compute and return the score for a match with e errors and x location. + * @param e Number of errors in match. + * @param x Location of match. + * @param loc Expected location of match. + * @param pattern Pattern being sought. + * @return Overall score for match (0.0 = good, 1.0 = bad). + */ + private: + double match_bitapScore(int64_t e, int64_t x, int64_t loc, + const std::wstring &pattern); + + /** + * Initialise the alphabet for the Bitap algorithm. + * @param pattern The text to encode. + * @return Hash of character locations. + */ + protected: + TCharPosMap match_alphabet(const std::wstring &pattern); + TCharPosMap match_alphabet(const std::string &pattern); + + // PATCH FUNCTIONS + + /** + * Increase the context until it is unique, + * but don't let the pattern expand beyond Match_MaxBits. + * @param patch The patch to grow. + * @param text Source text. + */ + protected: + void patch_addContext(Patch &patch, const std::wstring &text); + void patch_addContext(Patch &patch, const std::string &text); + + /** + * Compute a list of patches to turn text1 into text2. + * A set of diffs will be computed. + * @param text1 Old text. + * @param text2 New text. + * @return LinkedList of Patch objects. + */ + public: + TPatchVector patch_make(const std::wstring &text1, const std::wstring &text2); + TPatchVector patch_make(const std::string &text1, const std::string &text2); + + /** + * Compute a list of patches to turn text1 into text2. + * text1 will be derived from the provided diffs. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + TPatchVector patch_make(const TDiffVector &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is ignored, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param text2 Ignored. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + * @deprecated Prefer patch_make(const std::wstring &text1, const std::list< + * Diff > &diffs). + */ + public: + TPatchVector patch_make(const std::wstring &text1, const std::wstring &text2, + const TDiffVector &diffs); + TPatchVector patch_make(const std::string &text1, const std::string &text2, + const TDiffVector &diffs); + + /** + * Compute a list of patches to turn text1 into text2. + * text2 is not provided, diffs are the delta between text1 and text2. + * @param text1 Old text. + * @param diffs Array of diff tuples for text1 to text2. + * @return LinkedList of Patch objects. + */ + public: + TPatchVector patch_make(const std::wstring &text1, const TDiffVector &diffs); + TPatchVector patch_make(const std::string &text1, const TDiffVector &diffs); + + /** + * Given an array of patches, return another array that is identical. + * @param patches Array of patch objects. + * @return Array of patch objects. + */ + public: + TPatchVector patch_deepCopy(const TPatchVector &patches); + + /** + * Merge a set of patches onto the text. Return a patched text, as well + * as an array of true/false values indicating which patches were applied. + * @param patches Array of patch objects. + * @param text Old text. + * @return Two element Object array, containing the new text and an array of + * boolean values. + */ + public: + std::pair > patch_apply(TPatchVector patches, + std::wstring text); + std::pair > patch_apply(TPatchVector patches, + std::string text); + + /** + * Add some padding on text start and end so that edges can match something. + * Intended to be called only from within patch_apply. + * @param patches Array of patch objects. + * @return The padding string added to each side. + */ + public: + std::wstring patch_addPadding(TPatchVector &patches); + + /** + * Look through the patches and break up any which are longer than the + * maximum limit of the match algorithm. + * Intended to be called only from within patch_apply. + * @param patches LinkedList of Patch objects. + */ + public: + void patch_splitMax(TPatchVector &patches); + + /** + * Take a list of patches and return a textual representation. + * @param patches List of Patch objects. + * @return Text representation of patches. + */ + public: + std::wstring patch_toText(const TPatchVector &patches); + + /** + * Parse a textual representation of patches and return a List of Patch + * objects. + * @param textline Text representation of patches. + * @return List of Patch objects. + * @throws std::wstring If invalid input. + */ + public: + TPatchVector patch_fromText(const std::wstring &textline); + TPatchVector patch_fromText(const std::string &textline); + + /** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ + private: + static std::wstring safeMid(const std::wstring &str, std::size_t pos); + + /** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead + * of null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ + private: + static std::wstring safeMid(const std::wstring &str, std::size_t pos, + std::size_t len); +}; + +namespace NUtils { +std::wstring to_wstring(const diff_match_patch::TVariant &variant, + bool doubleQuoteEmpty = false); +std::wstring to_wstring(const Diff &diff, bool doubleQuoteEmpty = false); +std::wstring to_wstring(const Patch &patch, bool doubleQuoteEmpty = false); +} // namespace NUtils +#endif // DIFF_MATCH_PATCH_H diff --git a/cpp17/diff_match_patch_test.cpp b/cpp17/diff_match_patch_test.cpp new file mode 100644 index 0000000..5dc5abd --- /dev/null +++ b/cpp17/diff_match_patch_test.cpp @@ -0,0 +1,1364 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch.h" + +#include "diff_match_patch_test.h" +#include "diff_match_patch_utils.h" + +#ifdef USE_GTEST +#include "gtest/gtest.h" +#endif + +#include +#include + +int main(int argc, char **argv) { +#ifdef USE_GTEST + ::testing::InitGoogleTest(&argc, argv); + int retVal = RUN_ALL_TESTS(); +#else + diff_match_patch_test dmp_test; + std::cerr << "Starting diff_match_patch unit tests.\n"; + int retVal = dmp_test.run_all_tests(); + std::cerr << "Done.\n"; +#endif + return retVal; +} + +static wchar_t kZero{0}; +static wchar_t kOne{1}; +static wchar_t kTwo{2}; + +diff_match_patch_test::diff_match_patch_test() {} + +#ifndef USE_GTEST +bool diff_match_patch_test::runTest(std::function test) { + bool retVal = false; + try { + test(); + numPassedTests++; + retVal = true; + } catch (std::string msg) { + std::cerr << "Test failed: " << msg << "\n"; + numFailedTests++; + retVal = false; + } + return retVal; +} + +int diff_match_patch_test::run_all_tests() { + auto startTime = std::chrono::high_resolution_clock::now(); + + runTest(std::bind(&diff_match_patch_test::testDiffCommonPrefix, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCommonSuffix, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCommonOverlap, this)); + runTest(std::bind(&diff_match_patch_test::testDiffHalfmatch, this)); + runTest(std::bind(&diff_match_patch_test::testDiffLinesToChars, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCharsToLines, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCleanupMerge, this)); + runTest( + std::bind(&diff_match_patch_test::testDiffCleanupSemanticLossless, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCleanupSemantic, this)); + runTest(std::bind(&diff_match_patch_test::testDiffCleanupEfficiency, this)); + runTest(std::bind(&diff_match_patch_test::testDiffPrettyHtml, this)); + runTest(std::bind(&diff_match_patch_test::testDiffPrettyConsole, this)); + runTest(std::bind(&diff_match_patch_test::testDiffText, this)); + runTest(std::bind(&diff_match_patch_test::testDiffDelta, this)); + runTest(std::bind(&diff_match_patch_test::testDiffXIndex, this)); + runTest(std::bind(&diff_match_patch_test::testDiffLevenshtein, this)); + runTest(std::bind(&diff_match_patch_test::testDiffBisect, this)); + runTest(std::bind(&diff_match_patch_test::testDiffMain, this)); + + runTest(std::bind(&diff_match_patch_test::testMatchAlphabet, this)); + runTest(std::bind(&diff_match_patch_test::testMatchBitap, this)); + runTest(std::bind(&diff_match_patch_test::testMatchMain, this)); + + runTest(std::bind(&diff_match_patch_test::testPatchObj, this)); + runTest(std::bind(&diff_match_patch_test::testPatchFromText, this)); + runTest(std::bind(&diff_match_patch_test::testPatchToText, this)); + runTest(std::bind(&diff_match_patch_test::testPatchAddContext, this)); + runTest(std::bind(&diff_match_patch_test::testPatchMake, this)); + runTest(std::bind(&diff_match_patch_test::testPatchSplitMax, this)); + runTest(std::bind(&diff_match_patch_test::testPatchAddPadding, this)); + runTest(std::bind(&diff_match_patch_test::testPatchApply, this)); + if (numFailedTests == 0) + std::cout << numPassedTests << " Tests Passed\n" + << numFailedTests << " Tests Failed\n"; + else + std::cerr << numPassedTests << " Tests Passed\n" + << numFailedTests << " Tests Failed\n"; + auto endTime = std::chrono::high_resolution_clock::now(); + auto elapsed = + std::chrono::duration_cast(endTime - startTime) + .count(); + std::wcout << "Total time: " << elapsed << " ms\n"; + return (numFailedTests == 0) ? 0 : 1; +} +#endif + +// DIFF TEST FUNCTIONS + +TEST_F(diff_match_patch_test, testDiffCommonPrefix) { + // Detect any common prefix. + assertEquals("diff_commonPrefix: nullptr case.", 0, + dmp.diff_commonPrefix("abc", "xyz")); + + assertEquals("diff_commonPrefix: Non-nullptr case.", 4, + dmp.diff_commonPrefix("1234abcdef", "1234xyz")); + + assertEquals("diff_commonPrefix: Whole case.", 4, + dmp.diff_commonPrefix("1234", "1234xyz")); +} + +TEST_F(diff_match_patch_test, testDiffCommonSuffix) { + // Detect any common suffix. + assertEquals("diff_commonSuffix: nullptr case.", 0, + dmp.diff_commonSuffix("abc", "xyz")); + + assertEquals("diff_commonSuffix: Non-nullptr case.", 4, + dmp.diff_commonSuffix("abcdef1234", "xyz1234")); + + assertEquals("diff_commonSuffix: Whole case.", 4, + dmp.diff_commonSuffix("1234", "xyz1234")); +} + +TEST_F(diff_match_patch_test, testDiffCommonOverlap) { + // Detect any suffix/prefix overlap. + assertEquals("diff_commonOverlap: nullptr case.", 0, + dmp.diff_commonOverlap("", "abcd")); + + assertEquals("diff_commonOverlap: Whole case.", 3, + dmp.diff_commonOverlap("abc", "abcd")); + + assertEquals("diff_commonOverlap: No overlap.", 0, + dmp.diff_commonOverlap("123456", "abcd")); + + assertEquals("diff_commonOverlap: Overlap.", 3, + dmp.diff_commonOverlap("123456xxx", "xxxabcd")); + + // Some overly clever languages (C#) may treat ligatures as equal to their + // component letters. E.g. U+FB01 == 'fi' + assertEquals("diff_commonOverlap: Unicode.", 0, + dmp.diff_commonOverlap(L"fi", std::wstring(L"\ufb01i"))); +} + +TEST_F(diff_match_patch_test, testDiffHalfmatch) { + // Detect a halfmatch. + dmp.Diff_Timeout = 1; + assertEmpty("diff_halfMatch: No match #1.", + dmp.diff_halfMatch("1234567890", "abcdef")); + + assertEmpty("diff_halfMatch: No match #2.", + dmp.diff_halfMatch("12345", "23")); + + assertEquals("diff_halfMatch: Single Match #1.", + TStringVector({L"12", L"90", L"a", L"z", L"345678"}), + dmp.diff_halfMatch("1234567890", "a345678z")); + + assertEquals("diff_halfMatch: Single Match #2.", + TStringVector({L"a", L"z", L"12", L"90", L"345678"}), + dmp.diff_halfMatch("a345678z", "1234567890")); + + assertEquals("diff_halfMatch: Single Match #3.", + TStringVector({L"abc", L"z", L"1234", L"0", L"56789"}), + dmp.diff_halfMatch("abc56789z", "1234567890")); + + assertEquals("diff_halfMatch: Single Match #4.", + TStringVector({L"a", L"xyz", L"1", L"7890", L"23456"}), + dmp.diff_halfMatch("a23456xyz", "1234567890")); + + assertEquals( + "diff_halfMatch: Multiple Matches #1.", + TStringVector({L"12123", L"123121", L"a", L"z", L"1234123451234"}), + dmp.diff_halfMatch("121231234123451234123121", "a1234123451234z")); + + assertEquals( + "diff_halfMatch: Multiple Matches #2.", + TStringVector({L"", L"-=-=-=-=-=", L"x", L"", L"x-=-=-=-=-=-=-="}), + dmp.diff_halfMatch("x-=-=-=-=-=-=-=-=-=-=-=-=", "xx-=-=-=-=-=-=-=")); + + assertEquals( + "diff_halfMatch: Multiple Matches #3.", + TStringVector({L"-=-=-=-=-=", L"", L"", L"y", L"-=-=-=-=-=-=-=y"}), + dmp.diff_halfMatch("-=-=-=-=-=-=-=-=-=-=-=-=y", "-=-=-=-=-=-=-=yy")); + + // Optimal diff would be -q+x=H-i+e=lloHe+Hu=llo-Hew+y not + // -qHillo+x=HelloHe-w+Hulloy + assertEquals("diff_halfMatch: Non-optimal halfmatch.", + TStringVector({L"qHillo", L"w", L"x", L"Hulloy", L"HelloHe"}), + dmp.diff_halfMatch("qHilloHelloHew", "xHelloHeHulloy")); + + dmp.Diff_Timeout = 0; + assertEmpty("diff_halfMatch: Optimal no halfmatch.", + dmp.diff_halfMatch(L"qHilloHelloHew", L"xHelloHeHulloy")); +} + +TEST_F(diff_match_patch_test, testDiffLinesToChars) { + // Convert lines down to characters. + TStringVector tmpVector = TStringVector({L"", L"alpha\n", L"beta\n"}); + TVariantVector tmpVarList; + tmpVarList.emplace_back( + NUtils::to_wstring({1, 2, 1})); //(("\u0001\u0002\u0001")); + tmpVarList.emplace_back( + NUtils::to_wstring({2, 1, 2})); // (("\u0002\u0001\u0002")); + tmpVarList.emplace_back(tmpVector); + assertEquals( + "diff_linesToChars:", tmpVarList, + dmp.diff_linesToChars("alpha\nbeta\nalpha\n", "beta\nalpha\nbeta\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back(L""); + tmpVector.emplace_back(L"alpha\r\n"); + tmpVector.emplace_back(L"beta\r\n"); + tmpVector.emplace_back(L"\r\n"); + tmpVarList.emplace_back(std::wstring()); + tmpVarList.emplace_back( + NUtils::to_wstring({1, 2, 3, 3})); // (("\u0001\u0002\u0003\u0003")); + tmpVarList.emplace_back(tmpVector); + assertEquals("diff_linesToChars:", tmpVarList, + dmp.diff_linesToChars("", "alpha\r\nbeta\r\n\r\n\r\n")); + + tmpVector.clear(); + tmpVarList.clear(); + tmpVector.emplace_back(L""); + tmpVector.emplace_back(L"a"); + tmpVector.emplace_back(L"b"); + tmpVarList.emplace_back(NUtils::to_wstring(1)); // (("\u0001")); + tmpVarList.emplace_back(NUtils::to_wstring(2)); // (("\u0002")); + tmpVarList.emplace_back(tmpVector); + assertEquals("diff_linesToChars:", tmpVarList, + dmp.diff_linesToChars("a", "b")); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + tmpVarList.clear(); + std::wstring lines; + std::wstring chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.emplace_back(std::to_wstring(x) + L"\n"); + lines += std::to_wstring(x) + L"\n"; + chars += NUtils::to_wstring(x); + } + assertEquals("diff_linesToChars: More than 256 (setup).", n, + tmpVector.size()); + assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.emplace(tmpVector.begin(), L""); + tmpVarList.emplace_back(chars); + tmpVarList.emplace_back(std::wstring()); + tmpVarList.emplace_back(tmpVector); + assertEquals("diff_linesToChars: More than 256.", tmpVarList, + dmp.diff_linesToChars(lines, {})); +} + +TEST_F(diff_match_patch_test, testDiffCharsToLines) { + // First check that Diff equality works. + assertTrue("diff_charsToLines:", Diff(EQUAL, "a") == Diff(EQUAL, "a")); + + assertEquals("diff_charsToLines:", Diff(EQUAL, "a"), Diff(EQUAL, "a")); + + // Convert chars up to lines. + TDiffVector diffs; + diffs.emplace_back(EQUAL, + NUtils::to_wstring({1, 2, 1})); // ("\u0001\u0002\u0001"); + diffs.emplace_back(INSERT, + NUtils::to_wstring({2, 1, 2})); // ("\u0002\u0001\u0002"); + TStringVector tmpVector; + tmpVector.emplace_back(L""); + tmpVector.emplace_back(L"alpha\n"); + tmpVector.emplace_back(L"beta\n"); + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines:", + TDiffVector({Diff(EQUAL, "alpha\nbeta\nalpha\n"), + Diff(INSERT, "beta\nalpha\nbeta\n")}), + diffs); + + // More than 256 to reveal any 8-bit limitations. + int n = 300; + tmpVector.clear(); + std::vector tmpVarList; + std::wstring lines; + std::wstring chars; + for (int x = 1; x < n + 1; x++) { + tmpVector.emplace_back(std::to_wstring(x) + L"\n"); + lines += std::to_wstring(x) + L"\n"; + chars += NUtils::to_wstring(x); + } + assertEquals("diff_linesToChars: More than 256 (setup).", n, + tmpVector.size()); + assertEquals("diff_linesToChars: More than 256 (setup).", n, chars.length()); + tmpVector.emplace(tmpVector.begin(), L""); + diffs = {Diff(DELETE, chars)}; + dmp.diff_charsToLines(diffs, tmpVector); + assertEquals("diff_charsToLines: More than 256.", + TDiffVector({Diff(DELETE, lines)}), diffs); +} + +TEST_F(diff_match_patch_test, testDiffCleanupMerge) { + // Cleanup a messy diff. + TDiffVector diffs; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals( + "diff_cleanupMerge: No change case.", + TDiffVector({Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(INSERT, "c")}), + diffs); + + diffs = {Diff(EQUAL, "a"), Diff(EQUAL, "b"), Diff(EQUAL, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge equalities.", + TDiffVector({Diff(EQUAL, "abc")}), diffs); + + diffs = {Diff(DELETE, "a"), Diff(DELETE, "b"), Diff(DELETE, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge deletions.", + TDiffVector({Diff(DELETE, "abc")}), diffs); + + diffs = {Diff(INSERT, "a"), Diff(INSERT, "b"), Diff(INSERT, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Merge insertions.", + TDiffVector({Diff(INSERT, "abc")}), diffs); + + diffs = {Diff(DELETE, "a"), Diff(INSERT, "b"), Diff(DELETE, "c"), + Diff(INSERT, "d"), Diff(EQUAL, "e"), Diff(EQUAL, "f")}; + dmp.diff_cleanupMerge(diffs); + assertEquals( + "diff_cleanupMerge: Merge interweave.", + TDiffVector({Diff(DELETE, "ac"), Diff(INSERT, "bd"), Diff(EQUAL, "ef")}), + diffs); + + diffs = {Diff(DELETE, "a"), Diff(INSERT, "abc"), Diff(DELETE, "dc")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Prefix and suffix detection.", + TDiffVector({Diff(EQUAL, "a"), Diff(DELETE, "d"), + Diff(INSERT, "b"), Diff(EQUAL, "c")}), + diffs); + + diffs = {Diff(EQUAL, "x"), Diff(DELETE, "a"), Diff(INSERT, "abc"), + Diff(DELETE, "dc"), Diff(EQUAL, "y")}; + dmp.diff_cleanupMerge(diffs); + assertEquals( + "diff_cleanupMerge: Prefix and suffix detection with equalities.", + TDiffVector({Diff(EQUAL, "xa"), Diff(DELETE, "d"), Diff(INSERT, "b"), + Diff(EQUAL, "cy")}), + diffs); + + diffs = {Diff(EQUAL, "a"), Diff(INSERT, "ba"), Diff(EQUAL, "c")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left.", + TDiffVector({Diff(INSERT, "ab"), Diff(EQUAL, "ac")}), diffs); + + diffs = {Diff(EQUAL, "c"), Diff(INSERT, "ab"), Diff(EQUAL, "a")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right.", + TDiffVector({Diff(EQUAL, "ca"), Diff(INSERT, "ba")}), diffs); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "b"), Diff(EQUAL, "c"), + Diff(DELETE, "ac"), Diff(EQUAL, "x")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit left recursive.", + TDiffVector({Diff(DELETE, "abc"), Diff(EQUAL, "acx")}), diffs); + + diffs = {Diff(EQUAL, "x"), Diff(DELETE, "ca"), Diff(EQUAL, "c"), + Diff(DELETE, "b"), Diff(EQUAL, "a")}; + dmp.diff_cleanupMerge(diffs); + assertEquals("diff_cleanupMerge: Slide edit right recursive.", + TDiffVector({Diff(EQUAL, "xca"), Diff(DELETE, "cba")}), diffs); +} + +TEST_F(diff_match_patch_test, testDiffCleanupSemanticLossless) { + // Slide diffs to match logical boundaries. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(EQUAL, "AAA\r\n\r\nBBB"), Diff(INSERT, "\r\nDDD\r\n\r\nBBB"), + Diff(EQUAL, "\r\nEEE")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Blank lines.", + TDiffVector({Diff(EQUAL, "AAA\r\n\r\n"), + Diff(INSERT, "BBB\r\nDDD\r\n\r\n"), + Diff(EQUAL, "BBB\r\nEEE")}), + diffs); + + diffs = {Diff(EQUAL, "AAA\r\nBBB"), Diff(INSERT, " DDD\r\nBBB"), + Diff(EQUAL, " EEE")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemanticLossless: Line boundaries.", + TDiffVector({Diff(EQUAL, "AAA\r\n"), Diff(INSERT, "BBB DDD\r\n"), + Diff(EQUAL, "BBB EEE")}), + diffs); + + diffs = {Diff(EQUAL, "The c"), Diff(INSERT, "ow and the c"), + Diff(EQUAL, "at.")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Word boundaries.", + TDiffVector({Diff(EQUAL, "The "), Diff(INSERT, "cow and the "), + Diff(EQUAL, "cat.")}), + diffs); + + diffs = {Diff(EQUAL, "The-c"), Diff(INSERT, "ow-and-the-c"), + Diff(EQUAL, "at.")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Alphanumeric boundaries.", + TDiffVector({Diff(EQUAL, "The-"), Diff(INSERT, "cow-and-the-"), + Diff(EQUAL, "cat.")}), + diffs); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "a"), Diff(EQUAL, "ax")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Hitting the start.", + TDiffVector({Diff(DELETE, "a"), Diff(EQUAL, "aax")}), diffs); + + diffs = {Diff(EQUAL, "xa"), Diff(DELETE, "a"), Diff(EQUAL, "a")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Hitting the end.", + TDiffVector({Diff(EQUAL, "xaa"), Diff(DELETE, "a")}), diffs); + + diffs = {Diff(EQUAL, "The xxx. The "), Diff(INSERT, "zzz. The "), + Diff(EQUAL, "yyy.")}; + dmp.diff_cleanupSemanticLossless(diffs); + assertEquals("diff_cleanupSemantic: Sentence boundaries.", + TDiffVector({Diff(EQUAL, "The xxx."), Diff(INSERT, " The zzz."), + Diff(EQUAL, " The yyy.")}), + diffs); +} + +TEST_F(diff_match_patch_test, testDiffCleanupSemantic) { + // Cleanup semantically trivial equalities. + auto diffs = TDiffVector(); + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "cd"), Diff(EQUAL, "12"), + Diff(DELETE, "e")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #1.", + TDiffVector({Diff(DELETE, "ab"), Diff(INSERT, "cd"), + Diff(EQUAL, "12"), Diff(DELETE, "e")}), + diffs); + + diffs = {Diff(DELETE, "abc"), Diff(INSERT, "ABC"), Diff(EQUAL, "1234"), + Diff(DELETE, "wxyz")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No elimination #2.", + TDiffVector({Diff(DELETE, "abc"), Diff(INSERT, "ABC"), + Diff(EQUAL, "1234"), Diff(DELETE, "wxyz")}), + diffs); + + diffs = {Diff(DELETE, "a"), Diff(EQUAL, "b"), Diff(DELETE, "c")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Simple elimination.", + TDiffVector({Diff(DELETE, "abc"), Diff(INSERT, "b")}), diffs); + + diffs = {Diff(DELETE, "ab"), Diff(EQUAL, "cd"), Diff(DELETE, "e"), + Diff(EQUAL, "f"), Diff(INSERT, "g")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Backpass elimination.", + TDiffVector({Diff(DELETE, "abcdef"), Diff(INSERT, "cdfg")}), + diffs); + + diffs = {Diff(INSERT, "1"), Diff(EQUAL, "A"), Diff(DELETE, "B"), + Diff(INSERT, "2"), Diff(EQUAL, "_"), Diff(INSERT, "1"), + Diff(EQUAL, "A"), Diff(DELETE, "B"), Diff(INSERT, "2")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Multiple elimination.", + TDiffVector({Diff(DELETE, "AB_AB"), Diff(INSERT, "1A2_1A2")}), + diffs); + + diffs = {Diff(EQUAL, "The c"), Diff(DELETE, "ow and the c"), + Diff(EQUAL, "at.")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Word boundaries.", + TDiffVector({Diff(EQUAL, "The "), Diff(DELETE, "cow and the "), + Diff(EQUAL, "cat.")}), + diffs); + + diffs = {Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: No overlap elimination.", + TDiffVector({Diff(DELETE, "abcxx"), Diff(INSERT, "xxdef")}), + diffs); + + diffs = {Diff(DELETE, "abcxxx"), Diff(INSERT, "xxxdef")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Overlap elimination.", + TDiffVector({Diff(DELETE, "abc"), Diff(EQUAL, "xxx"), + Diff(INSERT, "def")}), + diffs); + + diffs = {Diff(DELETE, "xxxabc"), Diff(INSERT, "defxxx")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals("diff_cleanupSemantic: Reverse overlap elimination.", + TDiffVector({Diff(INSERT, "def"), Diff(EQUAL, "xxx"), + Diff(DELETE, "abc")}), + diffs); + + diffs = {Diff(DELETE, "abcd1212"), Diff(INSERT, "1212efghi"), + Diff(EQUAL, "----"), Diff(DELETE, "A3"), Diff(INSERT, "3BC")}; + dmp.diff_cleanupSemantic(diffs); + assertEquals( + "diff_cleanupSemantic: Two overlap eliminations.", + TDiffVector({Diff(DELETE, "abcd"), Diff(EQUAL, "1212"), + Diff(INSERT, "efghi"), Diff(EQUAL, "----"), + Diff(DELETE, "A"), Diff(EQUAL, "3"), Diff(INSERT, "BC")}), + diffs); +} + +TEST_F(diff_match_patch_test, testDiffCleanupEfficiency) { + // Cleanup operationally trivial equalities. + dmp.Diff_EditCost = 4; + auto diffs = TDiffVector(); + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: nullptr case.", TDiffVector(), diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals( + "diff_cleanupEfficiency: No elimination.", + TDiffVector({Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}), + diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Four-edit elimination.", + TDiffVector({Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xyz34")}), + diffs); + + diffs = {Diff(INSERT, "12"), Diff(EQUAL, "x"), Diff(DELETE, "cd"), + Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals("diff_cleanupEfficiency: Three-edit elimination.", + TDiffVector({Diff(DELETE, "xcd"), Diff(INSERT, "12x34")}), + diffs); + + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "xy"), + Diff(INSERT, "34"), Diff(EQUAL, "z"), Diff(DELETE, "cd"), + Diff(INSERT, "56")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals( + "diff_cleanupEfficiency: Backpass elimination.", + TDiffVector({Diff(DELETE, "abxyzcd"), Diff(INSERT, "12xy34z56")}), diffs); + + dmp.Diff_EditCost = 5; + diffs = {Diff(DELETE, "ab"), Diff(INSERT, "12"), Diff(EQUAL, "wxyz"), + Diff(DELETE, "cd"), Diff(INSERT, "34")}; + dmp.diff_cleanupEfficiency(diffs); + assertEquals( + "diff_cleanupEfficiency: High cost elimination.", + TDiffVector({Diff(DELETE, "abwxyzcd"), Diff(INSERT, "12wxyz34")}), diffs); + dmp.Diff_EditCost = 4; +} + +TEST_F(diff_match_patch_test, testDiffPrettyHtml) { + // Pretty print. + auto diffs = TDiffVector( + {Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")}); + assertEquals("diff_prettyHtml:", + L"
<B>b</B>c&d", + dmp.diff_prettyHtml(diffs)); +} + +TEST_F(diff_match_patch_test, testDiffPrettyConsole) { + // Pretty print. + static std::wstring kRed{L"\033[0;31m"}; + static std::wstring kGreen{L"\033[0;32m"}; + static std::wstring kYellow{L"\033[0;33m"}; + static std::wstring kReset{L"\033[m"}; + static std::wstring kEOL{NUtils::fromPercentEncoding(L"%C2%B6") + L"\n"}; + + auto diffs = TDiffVector( + {Diff(EQUAL, "a\n"), Diff(DELETE, "b"), Diff(INSERT, "c&d")}); + auto results = dmp.diff_prettyConsole(diffs); + assertEquals( + "diff_prettyConsole:", + L"a" + kEOL + kRed + L"b" + kReset + kGreen + L"c&d" + kReset, + results); +} + +TEST_F(diff_match_patch_test, testDiffText) { + // Compute the source and destination texts. + auto diffs = {Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), + Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), + Diff(EQUAL, " lazy")}; + assertEquals("diff_text1:", L"jumps over the lazy", dmp.diff_text1(diffs)); + assertEquals("diff_text2:", L"jumped over a lazy", dmp.diff_text2(diffs)); +} + +TEST_F(diff_match_patch_test, testDiffDelta) { + // Convert a diff into delta string. + auto diffs = TDiffVector({Diff(EQUAL, "jump"), Diff(DELETE, "s"), + Diff(INSERT, "ed"), Diff(EQUAL, " over "), + Diff(DELETE, "the"), Diff(INSERT, "a"), + Diff(EQUAL, " lazy"), Diff(INSERT, "old dog")}); + std::wstring text1 = dmp.diff_text1(diffs); + assertEquals("diff_text1: Base text.", L"jumps over the lazy", text1); + + std::wstring delta = dmp.diff_toDelta(diffs); + std::wstring golden = L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog"; + assertEquals("diff_toDelta:", L"=4\t-1\t+ed\t=6\t-3\t+a\t=5\t+old dog", + delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Normal.", diffs, + dmp.diff_fromDelta(text1, delta)); + + // Generates error (19 < 20). + assertThrow("diff_fromDelta: Too long.", + dmp.diff_fromDelta(text1 + L"x", delta), std::wstring); + + // Generates error (19 > 18). + assertThrow("diff_fromDelta: Too short.", + dmp.diff_fromDelta(text1.substr(1), delta), std::wstring); + + // Generates error (%c3%xy invalid Unicode). + assertThrow("diff_fromDelta: Invalid character.", + dmp.diff_fromDelta("", "+%c3%xy"), std::wstring); + + // Test deltas with special characters. + diffs = { + Diff(EQUAL, std::wstring(L"\u0680 ") + kZero + std::wstring(L" \t %")), + Diff(DELETE, std::wstring(L"\u0681 ") + kOne + std::wstring(L" \n ^")), + Diff(INSERT, std::wstring(L"\u0682 ") + kTwo + std::wstring(L" \\ |"))}; + + text1 = dmp.diff_text1(diffs); + golden = std::wstring(L"\u0680 ") + kZero + std::wstring(L" \t %\u0681 ") + + kOne + std::wstring(L" \n ^"); + assertEquals("diff_text1: Unicode text", golden, text1); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unicode", L"=7\t-7\t+%DA%82 %02 %5C %7C", delta); + + assertEquals("diff_fromDelta: Unicode", diffs, + dmp.diff_fromDelta(text1, delta)); + + // Verify pool of unchanged characters. + diffs = { + Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")}; + std::wstring text2 = dmp.diff_text2(diffs); + assertEquals("diff_text2: Unchanged characters.", + L"A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", text2); + + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Unchanged characters.", + L"+A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ", + delta); + + // Convert delta string into a diff. + assertEquals("diff_fromDelta: Unchanged characters.", diffs, + dmp.diff_fromDelta({}, delta)); +} + +TEST_F(diff_match_patch_test, testDiffXIndex) { + // Translate a location in text1 to text2. + auto diffs = TDiffVector( + {Diff(DELETE, "a"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")}); + assertEquals("diff_xIndex: Translation on equality.", 5, + dmp.diff_xIndex(diffs, 2)); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "1234"), Diff(EQUAL, "xyz")}; + assertEquals("diff_xIndex: Translation on deletion.", 1, + dmp.diff_xIndex(diffs, 3)); +} + +TEST_F(diff_match_patch_test, testDiffLevenshtein) { + auto diffs = TDiffVector( + {Diff(DELETE, "abc"), Diff(INSERT, "1234"), Diff(EQUAL, "xyz")}); + assertEquals("diff_levenshtein: Trailing equality.", 4, + dmp.diff_levenshtein(diffs)); + + diffs = {Diff(EQUAL, "xyz"), Diff(DELETE, "abc"), Diff(INSERT, "1234")}; + assertEquals("diff_levenshtein: Leading equality.", 4, + dmp.diff_levenshtein(diffs)); + + diffs = {Diff(DELETE, "abc"), Diff(EQUAL, "xyz"), Diff(INSERT, "1234")}; + assertEquals("diff_levenshtein: Middle equality.", 7, + dmp.diff_levenshtein(diffs)); +} + +TEST_F(diff_match_patch_test, testDiffBisect) { + // Normal. + std::wstring a = L"cat"; + std::wstring b = L"map"; + // Since the resulting diff hasn't been normalized, it would be ok if + // the insertion and deletion pairs are swapped. + // If the order changes, tweak this test as required. + auto diffs = + TDiffVector({Diff(DELETE, "c"), Diff(INSERT, "m"), Diff(EQUAL, "a"), + Diff(DELETE, "t"), Diff(INSERT, "p")}); + auto results = dmp.diff_bisect(a, b, std::numeric_limits::max()); + assertEquals("diff_bisect: Normal.", diffs, results); + + // Timeout. + diffs = {Diff(DELETE, "cat"), Diff(INSERT, "map")}; + results = dmp.diff_bisect(a, b, 0); + assertEquals("diff_bisect: Timeout.", diffs, results); +} + +TEST_F(diff_match_patch_test, testDiffMain) { + // Perform a trivial diff. + auto diffs = TDiffVector(); + assertEquals("diff_main: nullptr case.", diffs, dmp.diff_main("", "", false)); + + diffs = {Diff(DELETE, "abc")}; + assertEquals("diff_main: RHS side nullptr case.", diffs, + dmp.diff_main("abc", "", false)); + + diffs = {Diff(INSERT, "abc")}; + assertEquals("diff_main: LHS side nullptr case.", diffs, + dmp.diff_main("", "abc", false)); + + diffs = {Diff(EQUAL, "abc")}; + assertEquals("diff_main: Equality.", diffs, + dmp.diff_main("abc", "abc", false)); + + diffs = {Diff(EQUAL, "ab"), Diff(INSERT, "123"), Diff(EQUAL, "c")}; + assertEquals("diff_main: Simple insertion.", diffs, + dmp.diff_main("abc", "ab123c", false)); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "bc")}; + assertEquals("diff_main: Simple deletion.", diffs, + dmp.diff_main("a123bc", "abc", false)); + + diffs = {Diff(EQUAL, "a"), Diff(INSERT, "123"), Diff(EQUAL, "b"), + Diff(INSERT, "456"), Diff(EQUAL, "c")}; + assertEquals("diff_main: Two insertions.", diffs, + dmp.diff_main("abc", "a123b456c", false)); + + diffs = {Diff(EQUAL, "a"), Diff(DELETE, "123"), Diff(EQUAL, "b"), + Diff(DELETE, "456"), Diff(EQUAL, "c")}; + assertEquals("diff_main: Two deletions.", diffs, + dmp.diff_main("a123b456c", "abc", false)); + + // Perform a real diff. + // Switch off the timeout. + dmp.Diff_Timeout = 0; + diffs = {Diff(DELETE, "a"), Diff(INSERT, "b")}; + assertEquals("diff_main: Simple case #1.", diffs, + dmp.diff_main("a", "b", false)); + + diffs = {Diff(DELETE, "Apple"), Diff(INSERT, "Banana"), + Diff(EQUAL, "s are a"), Diff(INSERT, "lso"), Diff(EQUAL, " fruit.")}; + assertEquals( + "diff_main: Simple case #2.", diffs, + dmp.diff_main("Apples are a fruit.", "Bananas are also fruit.", false)); + + diffs = {Diff(DELETE, "a"), Diff(INSERT, L"\u0680"), Diff(EQUAL, "x"), + Diff(DELETE, "\t"), Diff(INSERT, NUtils::to_wstring(kZero))}; + assertEquals("diff_main: Simple case #3.", diffs, + dmp.diff_main(L"ax\t", std::wstring(L"\u0680x") + kZero, false)); + + diffs = {Diff(DELETE, "1"), Diff(EQUAL, "a"), Diff(DELETE, "y"), + Diff(EQUAL, "b"), Diff(DELETE, "2"), Diff(INSERT, "xab")}; + assertEquals("diff_main: Overlap #1.", diffs, + dmp.diff_main("1ayb2", "abxab", false)); + + diffs = {Diff(INSERT, "xaxcx"), Diff(EQUAL, "abc"), Diff(DELETE, "y")}; + assertEquals("diff_main: Overlap #2.", diffs, + dmp.diff_main("abcy", "xaxcxabc", false)); + + diffs = {Diff(DELETE, "ABCD"), + Diff(EQUAL, "a"), + Diff(DELETE, "="), + Diff(INSERT, "-"), + Diff(EQUAL, "bcd"), + Diff(DELETE, "="), + Diff(INSERT, "-"), + Diff(EQUAL, "efghijklmnopqrs"), + Diff(DELETE, "EFGHIJKLMNOefg")}; + assertEquals("diff_main: Overlap #3.", diffs, + dmp.diff_main("ABCDa=bcd=efghijklmnopqrsEFGHIJKLMNOefg", + "a-bcd-efghijklmnopqrs", false)); + + diffs = {Diff(INSERT, " "), Diff(EQUAL, "a"), Diff(INSERT, "nd"), + Diff(EQUAL, " [[Pennsylvania]]"), Diff(DELETE, " and [[New")}; + assertEquals("diff_main: Large equality.", diffs, + dmp.diff_main("a [[Pennsylvania]] and [[New", + " and [[Pennsylvania]]", false)); + + dmp.Diff_Timeout = 0.1f; // 100ms + // This test may 'fail' on extremely fast computers. If so, just increase the + // text lengths. + std::wstring a = + L"`Twas brillig, and the slithy toves\nDid gyre and gimble in the " + L"wabe:\nAll mimsy were the borogoves,\nAnd the mome raths outgrabe.\n"; + std::wstring b = + L"I am the very model of a modern major general,\nI've information " + L"vegetable, animal, and mineral,\nI know the kings of England, and I " + L"quote the fights historical,\nFrom Marathon to Waterloo, in order " + L"categorical.\n"; + // Increase the text lengths by 1024 times to ensure a timeout. + for (int x = 0; x < 10; x++) { + a = a + a; + b = b + b; + } + clock_t startTime = clock(); + dmp.diff_main(a, b); + clock_t endTime = clock(); + // Test that we took at least the timeout period. + assertTrue("diff_main: Timeout min.", + (dmp.Diff_Timeout * CLOCKS_PER_SEC) <= (endTime - startTime)); + // Test that we didn't take forever (be forgiving). + // Theoretically this test could fail very occasionally if the + // OS task swaps or locks up for a second at the wrong moment. + // Java seems to overrun by ~80% (compared with 10% for other languages). + // Therefore use an upper limit of 0.5s instead of 0.2s. + assertTrue("diff_main: Timeout max.", + (dmp.Diff_Timeout * CLOCKS_PER_SEC * 2) > (endTime - startTime)); + dmp.Diff_Timeout = 0; + + // Test the linemode speedup. + // Must be long to pass the 100 char cutoff. + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij" + L"\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghij\nabcdefghi" + L"j\nabcdefghij\n"; + assertEquals("diff_main: Simple line-mode.", dmp.diff_main(a, b, true), + dmp.diff_main(a, b, false)); + + a = L"12345678901234567890123456789012345678901234567890123456789012345678901" + L"23456789012345678901234567890123456789012345678901234567890"; + b = L"abcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghija" + L"bcdefghijabcdefghijabcdefghijabcdefghijabcdefghijabcdefghij"; + assertEquals("diff_main: Single line-mode.", dmp.diff_main(a, b, true), + dmp.diff_main(a, b, false)); + + a = L"1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890" + L"\n1234567890\n1234567890\n1234567890\n1234567890\n1234567890\n123456789" + L"0\n1234567890\n"; + b = L"abcdefghij\n1234567890\n1234567890\n1234567890\nabcdefghij\n1234567890" + L"\n1234567890\n1234567890\nabcdefghij\n1234567890\n1234567890\n123456789" + L"0\nabcdefghij\n"; + TStringVector texts_linemode = diff_rebuildtexts(dmp.diff_main(a, b, true)); + TStringVector texts_textmode = diff_rebuildtexts(dmp.diff_main(a, b, false)); + assertEquals("diff_main: Overlap line-mode.", texts_textmode, texts_linemode); +} + +// MATCH TEST FUNCTIONS + +TEST_F(diff_match_patch_test, testMatchAlphabet) { + // Initialise the bitmasks for Bitap. + TCharPosMap bitmask; + bitmask['a'] = 4; + bitmask['b'] = 2; + bitmask['c'] = 1; + assertEquals("match_alphabet: Unique.", bitmask, dmp.match_alphabet("abc")); + + bitmask = TCharPosMap(); + bitmask['a'] = 37; + bitmask['b'] = 18; + bitmask['c'] = 8; + assertEquals("match_alphabet: Duplicates.", bitmask, + dmp.match_alphabet("abcaba")); +} + +TEST_F(diff_match_patch_test, testMatchBitap) { + // Bitap algorithm. + dmp.Match_Distance = 100; + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Exact match #1.", 5, + dmp.match_bitap("abcdefghijk", "fgh", 5)); + + assertEquals("match_bitap: Exact match #2.", 5, + dmp.match_bitap("abcdefghijk", "fgh", 0)); + + assertEquals("match_bitap: Fuzzy match #1.", 4, + dmp.match_bitap("abcdefghijk", "efxhi", 0)); + + assertEquals("match_bitap: Fuzzy match #2.", 2, + dmp.match_bitap("abcdefghijk", "cdefxyhijk", 5)); + + assertEquals("match_bitap: Fuzzy match #3.", -1, + dmp.match_bitap("abcdefghijk", "bxy", 1)); + + assertEquals("match_bitap: Overflow.", 2, + dmp.match_bitap("123456789xx0", "3456789x0", 2)); + + assertEquals("match_bitap: Before start match.", 0, + dmp.match_bitap("abcdef", "xxabc", 4)); + + assertEquals("match_bitap: Beyond end match.", 3, + dmp.match_bitap("abcdef", "defyy", 4)); + + assertEquals("match_bitap: Oversized pattern.", 0, + dmp.match_bitap("abcdef", "xabcdefy", 0)); + + dmp.Match_Threshold = 0.4f; + assertEquals("match_bitap: Threshold #1.", 4, + dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + + dmp.Match_Threshold = 0.3f; + assertEquals("match_bitap: Threshold #2.", -1, + dmp.match_bitap("abcdefghijk", "efxyhi", 1)); + + dmp.Match_Threshold = 0.0f; + assertEquals("match_bitap: Threshold #3.", 1, + dmp.match_bitap("abcdefghijk", "bcdef", 1)); + + dmp.Match_Threshold = 0.5f; + assertEquals("match_bitap: Multiple select #1.", 0, + dmp.match_bitap("abcdexyzabcde", "abccde", 3)); + + assertEquals("match_bitap: Multiple select #2.", 8, + dmp.match_bitap("abcdexyzabcde", "abccde", 5)); + + dmp.Match_Distance = 10; // Strict location. + assertEquals("match_bitap: Distance test #1.", -1, + dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); + + assertEquals("match_bitap: Distance test #2.", 0, + dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdxxefg", 1)); + + dmp.Match_Distance = 1000; // Loose location. + assertEquals("match_bitap: Distance test #3.", 0, + dmp.match_bitap("abcdefghijklmnopqrstuvwxyz", "abcdefg", 24)); +} + +TEST_F(diff_match_patch_test, testMatchMain) { + // Full match. + assertEquals("match_main: Equality.", 0, + dmp.match_main("abcdef", "abcdef", 1000)); + + assertEquals("match_main: nullptr text.", -1, + dmp.match_main("", "abcdef", 1)); + + assertEquals("match_main: nullptr pattern.", 3, + dmp.match_main("abcdef", "", 3)); + + assertEquals("match_main: Exact match.", 3, + dmp.match_main("abcdef", "de", 3)); + + dmp.Match_Threshold = 0.7f; + assertEquals("match_main: Complex match.", 4, + dmp.match_main("I am the very model of a modern major general.", + " that berry ", 5)); + dmp.Match_Threshold = 0.5f; +} + +// PATCH TEST FUNCTIONS + +TEST_F(diff_match_patch_test, testPatchObj) { + // Patch Object. + Patch p; + p.start1 = 20; + p.start2 = 21; + p.length1 = 18; + p.length2 = 17; + p.diffs = {Diff(EQUAL, "jump"), Diff(DELETE, "s"), Diff(INSERT, "ed"), + Diff(EQUAL, " over "), Diff(DELETE, "the"), Diff(INSERT, "a"), + Diff(EQUAL, "\nlaz")}; + std::wstring strp = + L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("patch: toString.", strp, p.toString()); +} + +TEST_F(diff_match_patch_test, testPatchFromText) { + assertTrue("patch_fromText: #0.", dmp.patch_fromText("").empty()); + + std::wstring strp = + L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n %0Alaz\n"; + assertEquals("patch_fromText: #1.", strp, + dmp.patch_fromText(strp)[0].toString()); + + assertEquals("patch_fromText: #2.", L"@@ -1 +1 @@\n-a\n+b\n", + dmp.patch_fromText("@@ -1 +1 @@\n-a\n+b\n")[0].toString()); + + assertEquals("patch_fromText: #3.", L"@@ -1,3 +0,0 @@\n-abc\n", + dmp.patch_fromText("@@ -1,3 +0,0 @@\n-abc\n")[0].toString()); + + assertEquals("patch_fromText: #4.", L"@@ -0,0 +1,3 @@\n+abc\n", + dmp.patch_fromText("@@ -0,0 +1,3 @@\n+abc\n")[0].toString()); + + // Generates error. + assertThrow("patch_fromText: #5.", dmp.patch_fromText("Bad\nPatch\n"), + std::wstring); +} + +TEST_F(diff_match_patch_test, testPatchToText) { + std::wstring strp = + L"@@ -21,18 +22,17 @@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + auto patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Single", strp, dmp.patch_toText(patches)); + + strp = + L"@@ -1,9 +1,9 @@\n-f\n+F\n oo+fooba\n@@ -7,9 +7,9 @@\n obar\n-,\n+.\n " + L"tes\n"; + patches = dmp.patch_fromText(strp); + assertEquals("patch_toText: Dua", strp, dmp.patch_toText(patches)); +} + +TEST_F(diff_match_patch_test, testPatchAddContext) { + dmp.Patch_Margin = 4; + auto p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_addContext: Simple case.", + L"@@ -17,12 +17,18 @@\n fox \n-jump\n+somersault\n s ov\n", + p.toString()); + + p = dmp.patch_fromText("@@ -21,4 +21,10 @@\n-jump\n+somersault\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough trailing context.", + L"@@ -17,10 +17,16 @@\n fox \n-jump\n+somersault\n s.\n", + p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext(p, "The quick brown fox jumps."); + assertEquals("patch_addContext: Not enough leading context.", + L"@@ -1,7 +1,8 @@\n Th\n-e\n+at\n qui\n", p.toString()); + + p = dmp.patch_fromText("@@ -3 +3,2 @@\n-e\n+at\n")[0]; + dmp.patch_addContext( + p, "The quick brown fox jumps. The quick brown fox crashes."); + assertEquals("patch_addContext: Ambiguity.", + L"@@ -1,27 +1,28 @@\n Th\n-e\n+at\n quick brown fox jumps. \n", + p.toString()); +} + +TEST_F(diff_match_patch_test, testPatchMake) { + TPatchVector patches; + patches = dmp.patch_make("", ""); + assertEquals("patch_make: nullptr case", L"", dmp.patch_toText(patches)); + + std::wstring text1 = L"The quick brown fox jumps over the lazy dog."; + std::wstring text2 = L"That quick brown fox jumped over a lazy dog."; + std::wstring expectedPatch = + L"@@ -1,8 +1,7 @@\n Th\n-at\n+e\n qui\n@@ -21,17 +21,18 @@\n " + L"jump\n-ed\n+s\n over \n-a\n+the\n laz\n"; + // The second patch must be "-21,17 +21,18", not "-22,17 +21,18" due to + // rolling context. + patches = dmp.patch_make(text2, text1); + assertEquals("patch_make: Text2+Text1 inputs", expectedPatch, + dmp.patch_toText(patches)); + + expectedPatch = + L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 @@\n " + L"jump\n-s\n+ed\n over \n-the\n+a\n laz\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Text1+Text2 inputs", expectedPatch, + dmp.patch_toText(patches)); + + auto diffs = dmp.diff_main(text1, text2, false); + patches = dmp.patch_make(diffs); + assertEquals("patch_make: Diff input", expectedPatch, + dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, diffs); + assertEquals("patch_make: Text1+Diff inputs", expectedPatch, + dmp.patch_toText(patches)); + + patches = dmp.patch_make(text1, text2, diffs); + assertEquals("patch_make: Text1+Text2+Diff inputs (deprecated)", + expectedPatch, dmp.patch_toText(patches)); + + patches = dmp.patch_make("`1234567890-=[]\\;',./", "~!@#$%^&*()_+{}|:\"<>?"); + assertEquals("patch_toText: Character encoding.", + L"@@ -1,21 +1,21 " + L"@@\n-%601234567890-=%5B%5D%5C;',./" + L"\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n", + dmp.patch_toText(patches)); + + diffs = {Diff(DELETE, "`1234567890-=[]\\;',./"), + Diff(INSERT, "~!@#$%^&*()_+{}|:\"<>?")}; + assertEquals( + "patch_fromText: Character decoding.", diffs, + dmp.patch_fromText("@@ -1,21 +1,21 " + "@@\n-%601234567890-=%5B%5D%5C;',./" + "\n+~!@#$%25%5E&*()_+%7B%7D%7C:%22%3C%3E?\n")[0] + .diffs); + + text1 = {}; + for (int x = 0; x < 100; x++) { + text1 += L"abcdef"; + } + text2 = text1 + L"123"; + expectedPatch = + L"@@ -573,28 +573,31 @@\n cdefabcdefabcdefabcdefabcdef\n+123\n"; + patches = dmp.patch_make(text1, text2); + assertEquals("patch_make: Long string with repeats.", expectedPatch, + dmp.patch_toText(patches)); +} + +TEST_F(diff_match_patch_test, testPatchSplitMax) { + // Confirm Match_MaxBits is 32. + TPatchVector patches; + patches = dmp.patch_make( + "abcdefghijklmnopqrstuvwxyz01234567890", + "XabXcdXefXghXijXklXmnXopXqrXstXuvXwxXyzX01X23X45X67X89X0"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #1.", + L"@@ -1,32 +1,46 @@\n+X\n ab\n+X\n cd\n+X\n ef\n+X\n gh\n+X\n " + L"ij\n+X\n kl\n+X\n mn\n+X\n op\n+X\n qr\n+X\n st\n+X\n " + L"uv\n+X\n wx\n+X\n yz\n+X\n 012345\n@@ -25,13 +39,18 @@\n " + L"zX01\n+X\n 23\n+X\n 45\n+X\n 67\n+X\n 89\n+X\n 0\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make( + "abcdef123456789012345678901234567890123456789012345678901234567890123456" + "7890uvwxyz", + "abcdefuvwxyz"); + std::wstring oldToText = dmp.patch_toText(patches); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #2.", oldToText, dmp.patch_toText(patches)); + + patches = dmp.patch_make( + "1234567890123456789012345678901234567890123456789012345678901234567890", + "abc"); + dmp.patch_splitMax(patches); + assertEquals("patch_splitMax: #3.", + L"@@ -1,32 +1,4 @@\n-1234567890123456789012345678\n 9012\n@@ " + L"-29,32 +1,4 @@\n-9012345678901234567890123456\n 7890\n@@ " + L"-57,14 +1,3 @@\n-78901234567890\n+abc\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make( + "abcdefghij , h : 0 , t : 1 abcdefghij , h : 0 , t : 1 abcdefghij , h : " + "0 , t : 1", + "abcdefghij , h : 1 , t : 1 abcdefghij , h : 1 , t : 1 abcdefghij , h : " + "0 , t : 1"); + dmp.patch_splitMax(patches); + assertEquals( + "patch_splitMax: #4.", + L"@@ -2,32 +2,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n@@ " + L"-29,32 +29,32 @@\n bcdefghij , h : \n-0\n+1\n , t : 1 abcdef\n", + dmp.patch_toText(patches)); +} + +TEST_F(diff_match_patch_test, testPatchAddPadding) { + TPatchVector patches; + patches = dmp.patch_make("", "test"); + assertEquals("patch_addPadding: Both edges ful", L"@@ -0,0 +1,4 @@\n+test\n", + dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges full.", + L"@@ -1,8 +1,12 @@\n %01%02%03%04\n+test\n %01%02%03%04\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make("XY", "XtestY"); + assertEquals("patch_addPadding: Both edges partial.", + L"@@ -1,2 +1,6 @@\n X\n+test\n Y\n", dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges partial.", + L"@@ -2,8 +2,12 @@\n %02%03%04X\n+test\n Y%01%02%03\n", + dmp.patch_toText(patches)); + + patches = dmp.patch_make("XXXXYYYY", "XXXXtestYYYY"); + assertEquals("patch_addPadding: Both edges none.", + L"@@ -1,8 +1,12 @@\n XXXX\n+test\n YYYY\n", + dmp.patch_toText(patches)); + dmp.patch_addPadding(patches); + assertEquals("patch_addPadding: Both edges none.", + L"@@ -5,8 +5,12 @@\n XXXX\n+test\n YYYY\n", + dmp.patch_toText(patches)); +} + +TEST_F(diff_match_patch_test, testPatchApply) { + dmp.Match_Distance = 1000; + dmp.Match_Threshold = 0.5f; + dmp.Patch_DeleteThreshold = 0.5f; + TPatchVector patches; + patches = dmp.patch_make("", ""); + auto results = dmp.patch_apply(patches, "Hello world."); + auto &&boolArray = results.second; + + std::wstring resultStr = + results.first + L"\t" + std::to_wstring(boolArray.size()); + assertEquals("patch_apply: nullptr case.", L"Hello world.\t0", resultStr); + + patches = dmp.patch_make("The quick brown fox jumps over the lazy dog.", + "That quick brown fox jumped over a lazy dog."); + assertEquals("patch_apply: Exact match.", + L"@@ -1,11 +1,12 @@\n Th\n-e\n+at\n quick b\n@@ -22,18 +22,17 " + L"@@\n jump\n-s\n+ed\n over \n-the\n+a\n laz\n", + dmp.patch_toText(patches)); + + results = + dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + + assertEquals("patch_apply: Exact match.", + L"That quick brown fox jumped over a lazy dog.\ttrue\ttrue", + resultStr); + + results = dmp.patch_apply(patches, + "The quick red rabbit jumps over the tired tiger."); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Partial match.", + L"That quick red rabbit jumped over a tired tiger.\ttrue\ttrue", + resultStr); + + results = dmp.patch_apply(patches, + "I am the very model of a modern major general."); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Failed match.", + L"I am the very model of a modern major general.\tfalse\tfalse", + resultStr); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy"); + results = dmp.patch_apply(patches, + "x123456789012345678901234567890-----++++++++++----" + "-123456789012345678901234567890y"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Big delete, small change.", L"xabcy\ttrue\ttrue", + resultStr); + + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy"); + results = dmp.patch_apply(patches, + "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Big delete, large change 1.", + L"xabc12345678901234567890---------------++++++++++-------------" + L"--12345678901234567890y\tfalse\ttrue", + resultStr); + + dmp.Patch_DeleteThreshold = 0.6f; + patches = dmp.patch_make( + "x1234567890123456789012345678901234567890123456789012345678901234567890" + "y", + "xabcy"); + results = dmp.patch_apply(patches, + "x12345678901234567890---------------++++++++++----" + "-----------12345678901234567890y"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Big delete, large change 2.", L"xabcy\ttrue\ttrue", + resultStr); + dmp.Patch_DeleteThreshold = 0.5f; + + dmp.Match_Threshold = 0.0f; + dmp.Match_Distance = 0; + patches = + dmp.patch_make("abcdefghijklmnopqrstuvwxyz--------------------1234567890", + "abcXXXXXXXXXXdefghijklmnopqrstuvwxyz--------------------" + "1234567YYYYYYYYYY890"); + results = dmp.patch_apply( + patches, "ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------1234567890"); + boolArray = results.second; + resultStr = results.first + NUtils::to_wstring(boolArray); + assertEquals("patch_apply: Compensate for failed patch.", + L"ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------------" + L"1234567YYYYYYYYYY890\tfalse\ttrue", + resultStr); + dmp.Match_Threshold = 0.5f; + dmp.Match_Distance = 1000; + + patches = dmp.patch_make("", "test"); + std::wstring patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, ""); + assertEquals("patch_apply: No side effects.", patchStr, + dmp.patch_toText(patches)); + + patches = + dmp.patch_make("The quick brown fox jumps over the lazy dog.", "Woof"); + patchStr = dmp.patch_toText(patches); + dmp.patch_apply(patches, "The quick brown fox jumps over the lazy dog."); + assertEquals("patch_apply: No side effects with major delete.", patchStr, + dmp.patch_toText(patches)); + + patches = dmp.patch_make("", "test"); + results = dmp.patch_apply(patches, ""); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0], false); + assertEquals("patch_apply: Edge exact match.", L"test\ttrue", resultStr); + + patches = dmp.patch_make("XY", "XtestY"); + results = dmp.patch_apply(patches, "XY"); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0], false); + assertEquals("patch_apply: Near edge exact match.", L"XtestY\ttrue", + resultStr); + + patches = dmp.patch_make("y", "y123"); + results = dmp.patch_apply(patches, "x"); + boolArray = results.second; + resultStr = results.first + L"\t" + NUtils::to_wstring(boolArray[0]); + assertEquals("patch_apply: Edge partial match.", L"x123\ttrue", resultStr); +} + +TEST_F(diff_match_patch_test, fromGitHubExamples) { + auto lhs = + L"I am the very model of a modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical."; + auto rhs = + L"I am the very model of a cartoon individual, My animation's comical, " + L"unusual, and whimsical, I'm quite adept at funny gags, comedic theory " + L"I have read, From wicked puns and stupid jokes to anvils that drop on " + L"your head."; + auto diffs = dmp.diff_main(lhs, rhs); + dmp.diff_cleanupSemantic(diffs); + auto console = dmp.diff_prettyConsole(diffs); + auto html = dmp.diff_prettyHtml(diffs); + auto delta = dmp.diff_toDelta(diffs); + + auto consoleGolden = + L"I am the very model of a \x1B[0;31mmodern Major-General, I've " + L"information vegetable, animal, and mineral, I know the kings of " + L"England, and I quote the fights historical, From Marathon to Waterloo, " + L"in order categorical\x1B[m\x1B[0;32mcartoon individual, My animation's " + L"comical, unusual, and whimsical, I'm quite adept at funny gags, " + L"comedic theory I have read, From wicked puns and stupid jokes to " + L"anvils that drop on your head\x1B[m."; + assertEquals("gitHubDemos", consoleGolden, console); + + auto htmlGolden = + LR"(I am the very model of a modern Major-General, I've information vegetable, animal, and mineral, I know the kings of England, and I quote the fights historical, From Marathon to Waterloo, in order categoricalcartoon individual, My animation's comical, unusual, and whimsical, I'm quite adept at funny gags, comedic theory I have read, From wicked puns and stupid jokes to anvils that drop on your head.)"; + assertEquals("gitHubDemos", htmlGolden, html); + + auto deltaGolden = + L"=25\t-182\t+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\t=1"; + assertEquals("gitHubDemos", deltaGolden, delta); + + auto patches = dmp.patch_make(lhs, rhs); + auto patch = dmp.patch_toText(patches); + auto patchGolden = + L"@@ -22,187 +22,198 @@\n f a \n-modern Major-General, I've information " + L"vegetable, animal, and mineral, I know the kings of England, and I " + L"quote the fights historical, From Marathon to Waterloo, in order " + L"categorical\n+cartoon individual, My animation's comical, unusual, and " + L"whimsical, I'm quite adept at funny gags, comedic theory I have read, " + L"From wicked puns and stupid jokes to anvils that drop on your head\n " + L".\n"; + assertEquals("gitHubDemos", patchGolden, patch); +} diff --git a/cpp17/diff_match_patch_test.h b/cpp17/diff_match_patch_test.h new file mode 100644 index 0000000..07678d2 --- /dev/null +++ b/cpp17/diff_match_patch_test.h @@ -0,0 +1,169 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_TEST_H +#define DIFF_MATCH_PATCH_TEST_H + +#ifdef USE_GTEST +#include "gtest/gtest.h" +#define assertEquals(msg, GOLDEN, COMPUTED) EXPECT_EQ(GOLDEN, COMPUTED) << msg +#define assertEmpty(msg, COMPUTED) EXPECT_TRUE(COMPUTED.empty()) << msg +#define assertTrue(msg, COMPUTED) EXPECT_TRUE(COMPUTED) << msg +#define assertFalse(msg, COMPUTED) EXPECT_FALSE(COMPUTED) << msg +#define PUBLIC_TESTING : public testing::Test +#define assertThrow(msg, STATEMENT, EXCEPTION_TYPE) \ + EXPECT_THROW(STATEMENT, EXCEPTION_TYPE) << msg +#else +#include +#define PUBLIC_TESTING +#define TEST_F(className, funcName) void diff_match_patch_test::funcName() +#endif + +#include "diff_match_patch_utils.h" + +class diff_match_patch_test PUBLIC_TESTING { + public: + using TStringVector = diff_match_patch::TStringVector; + using TCharPosMap = diff_match_patch::TCharPosMap; + using TVariant = diff_match_patch::TVariant; + using TVariantVector = diff_match_patch::TVariantVector; + + diff_match_patch_test(); + +#ifndef USE_GTEST + public: + int run_all_tests(); + + // DIFF TEST FUNCTIONS + void testDiffCommonPrefix(); + void testDiffCommonSuffix(); + void testDiffCommonOverlap(); + void testDiffHalfmatch(); + void testDiffLinesToChars(); + void testDiffCharsToLines(); + void testDiffCleanupMerge(); + void testDiffCleanupSemanticLossless(); + void testDiffCleanupSemantic(); + void testDiffCleanupEfficiency(); + void testDiffPrettyHtml(); + void testDiffPrettyConsole(); + void testDiffText(); + void testDiffDelta(); + void testDiffXIndex(); + void testDiffLevenshtein(); + void testDiffBisect(); + void testDiffMain(); + + // MATCH TEST FUNCTIONS + void testMatchAlphabet(); + void testMatchBitap(); + void testMatchMain(); + + // PATCH TEST FUNCTIONS + void testPatchObj(); + void testPatchFromText(); + void testPatchToText(); + void testPatchAddContext(); + void testPatchMake(); + void testPatchSplitMax(); + void testPatchAddPadding(); + void testPatchApply(); + + void fromGitHubExamples(); + + private: + bool runTest(std::function test); + std::size_t numPassedTests{0}; + std::size_t numFailedTests{0}; + + // Define equality. + template + void assertEquals(const std::string &strCase, const T &lhs, const T &rhs) { + bool failed = (lhs.size() != rhs.size()); + if (!failed) { + for (auto ii = 0ULL; !failed && (ii < lhs.size()); ++ii) { + auto &&t1 = lhs[ii]; + auto &&t2 = rhs[ii]; + failed = t1 != t2; + } + } + + if (failed) { + // Build human readable description of both lists. + auto lhsString = NUtils::to_wstring(lhs, true); + auto rhsString = NUtils::to_wstring(rhs, true); + reportFailure(strCase, lhsString, rhsString); + return; + } + reportPassed(strCase); + } + + void assertEquals(const std::string &strCase, bool lhs, bool rhs); + void assertEquals(const std::string &strCase, std::size_t n1, std::size_t n2); + void assertEquals(const std::string &strCase, const std::wstring &s1, + const std::wstring &s2); + void assertEquals(const std::string &strCase, const std::string &s1, + const std::string &s2); + void assertEquals(const std::string &strCase, const Diff &d1, const Diff &d2); + void assertEquals(const std::string &strCase, const TVariant &var1, + const TVariant &var2); + void assertEquals(const std::string &strCase, const TCharPosMap &m1, + const TCharPosMap &m2); + + void assertTrue(const std::string &strCase, bool value); + void assertFalse(const std::string &strCase, bool value); + void assertEmpty(const std::string &strCase, const TStringVector &list); + + void reportFailure(const std::string &strCase, const std::wstring &expected, + const std::wstring &actual); + void reportPassed(const std::string &strCase); + +#define assertThrow(msg, COMMAND, EXCEPTION_TYPE) \ + { \ + bool exceptionTriggered = false; \ + try { \ + COMMAND; \ + assertFalse(msg, true); \ + } catch (const EXCEPTION_TYPE &ex) { \ + exceptionTriggered = true; \ + } \ + assertTrue(std::string(msg) + std::string(" - Exception triggered"), \ + exceptionTriggered); \ + } + +#endif + public: + bool equals(const TVariant &var1, const TVariant &var2); + + template + bool equals(const T &lhs, const T &rhs) { + bool equal = (lhs.size() == rhs.size()); + for (auto ii = 0ULL; equal && (ii < lhs.size()); ++ii) { + auto &&t1 = lhs[ii]; + auto &&t2 = rhs[ii]; + equal = t1 == t2; + } + return equal; + } + diff_match_patch dmp; + + // Construct the two texts which made up the diff originally. + TStringVector diff_rebuildtexts(const TDiffVector &diffs); +}; + +#endif // DIFF_MATCH_PATCH_TEST_H diff --git a/cpp17/diff_match_patch_test_assertEquals.cpp b/cpp17/diff_match_patch_test_assertEquals.cpp new file mode 100644 index 0000000..4e01f68 --- /dev/null +++ b/cpp17/diff_match_patch_test_assertEquals.cpp @@ -0,0 +1,152 @@ +/* + * Diff Match and Patch -- Test Harness + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "diff_match_patch.h" +#include "diff_match_patch_test.h" +#include "diff_match_patch_utils.h" +#ifndef USE_GTEST +void diff_match_patch_test::reportFailure(const std::string &strCase, + const std::wstring &expected, + const std::wstring &actual) { + std::cout << "FAILED : " + strCase + "\n"; + std::wcerr << " Expected: " << expected << "\n Actual: " << actual + << "\n"; + numFailedTests++; + // throw strCase; +} + +void diff_match_patch_test::reportPassed(const std::string &strCase) { + std::cout << "PASSED: " + strCase + "\n"; +} + +void diff_match_patch_test::assertEquals(const std::string &strCase, + std::size_t n1, std::size_t n2) { + if (n1 != n2) { + reportFailure(strCase, std::to_wstring(n1), std::to_wstring(n2)); + } + reportPassed(strCase); +} + +void diff_match_patch_test::assertEquals(const std::string &strCase, + const std::wstring &s1, + const std::wstring &s2) { + if (s1 != s2) { + reportFailure(strCase, s1, s2); + } + reportPassed(strCase); +} + +void diff_match_patch_test::assertEquals(const std::string &strCase, + const std::string &s1, + const std::string &s2) { + return assertEquals(strCase, NUtils::to_wstring(s1), NUtils::to_wstring(s2)); +} + +void diff_match_patch_test::assertEquals(const std::string &strCase, + const Diff &d1, const Diff &d2) { + if (d1 != d2) { + reportFailure(strCase, d1.toString(), d2.toString()); + } + reportPassed(strCase); +} + +void diff_match_patch_test::assertEquals(const std::string &strCase, + const TVariant &var1, + const TVariant &var2) { + if (var1 != var2) { + reportFailure(strCase, NUtils::to_wstring(var1), NUtils::to_wstring(var2)); + } + reportPassed(strCase); +} + +void diff_match_patch_test::assertEquals(const std::string &strCase, + const TCharPosMap &m1, + const TCharPosMap &m2) { + for (auto &&ii : m1) { + auto rhs = m2.find(ii.first); + if (rhs == m2.end()) { + reportFailure(strCase, + L"(" + NUtils::to_wstring(ii.first) + L"," + + std::to_wstring(ii.second) + L")", + L""); + } + } + + for (auto &&ii : m2) { + auto rhs = m1.find(ii.first); + if (rhs == m1.end()) { + reportFailure(strCase, + L"(" + NUtils::to_wstring(ii.first) + L"," + + std::to_wstring(ii.second) + L")", + L""); + } + } + + reportPassed(strCase); +} + +void diff_match_patch_test::assertEquals(const std::string &strCase, bool lhs, + bool rhs) { + if (lhs != rhs) { + reportFailure(strCase, NUtils::to_wstring(lhs, false), + NUtils::to_wstring(rhs, false)); + } + reportPassed(strCase); +} + +void diff_match_patch_test::assertTrue(const std::string &strCase, bool value) { + if (!value) { + reportFailure(strCase, NUtils::to_wstring(true, false), + NUtils::to_wstring(false, false)); + } + reportPassed(strCase); +} + +void diff_match_patch_test::assertFalse(const std::string &strCase, + bool value) { + if (value) { + reportFailure(strCase, NUtils::to_wstring(false, false), + NUtils::to_wstring(true, false)); + } + reportPassed(strCase); +} + +void diff_match_patch_test::assertEmpty(const std::string &strCase, + const TStringVector &list) { + if (!list.empty()) { + throw strCase; + } +} +#endif + +// Construct the two texts which made up the diff originally. +diff_match_patch_test::TStringVector diff_match_patch_test::diff_rebuildtexts( + const TDiffVector &diffs) { + TStringVector text(2, std::wstring()); + for (auto &&myDiff : diffs) { + if (myDiff.operation != INSERT) { + text[0] += myDiff.text; + } + if (myDiff.operation != DELETE) { + text[1] += myDiff.text; + } + } + return text; +} diff --git a/cpp17/diff_match_patch_utils.cpp b/cpp17/diff_match_patch_utils.cpp new file mode 100644 index 0000000..dfd8951 --- /dev/null +++ b/cpp17/diff_match_patch_utils.cpp @@ -0,0 +1,168 @@ +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "diff_match_patch_utils.h" + +#include +#include + +namespace NUtils { +std::wstring safeMid(const std::wstring &str, std::size_t pos) { + return safeMid(str, pos, std::string::npos); +} + +std::wstring safeMid(const std::wstring &str, std::size_t pos, + std::size_t len) { + return (pos == str.length()) ? std::wstring() : str.substr(pos, len); +} + +void replace(std::wstring &inString, const std::wstring &from, + const std::wstring &to) { + std::size_t pos = inString.find(from); + while (pos != std::wstring::npos) { + inString.replace(pos, from.length(), to); + pos = inString.find(from, pos + to.length()); + } +} + +wchar_t toHexUpper(wchar_t value) { return L"0123456789ABCDEF"[value & 0xF]; } + +std::wstring toPercentEncoding(wchar_t c, const std::wstring &exclude, + const std::wstring &include) { + std::wstring retVal; + + if (((c >= 0x61 && c <= 0x7A) // ALPHA + || (c >= 0x41 && c <= 0x5A) // ALPHA + || (c >= 0x30 && c <= 0x39) // DIGIT + || c == 0x2D // - + || c == 0x2E // . + || c == 0x5F // _ + || c == 0x7E // ~ + || (exclude.find(c) != std::string::npos)) && + (include.find(c) == std::string::npos)) { + retVal = std::wstring(1, c); + } else { + retVal = L'%'; + retVal += toHexUpper((c & 0xf0) >> 4); + retVal += toHexUpper(c & 0xf); + } + return retVal; +} + +std::wstring toPercentEncoding( + const std::wstring &input, const std::wstring &exclude /*= std::wstring()*/, + const std::wstring &include /*= std::wstring() */) { + if (input.empty()) return {}; + std::wstring retVal; + retVal.reserve(input.length() * 3); + + static_assert(sizeof(wchar_t) <= 4, "wchar_t is greater that 32 bit"); + + std::wstring_convert > utf8_conv; + for (auto &&c : input) { + auto currStr = std::wstring(1, c); + auto asBytes = utf8_conv.to_bytes(currStr); + for (auto &&ii : asBytes) { + if (ii) retVal += toPercentEncoding(ii, exclude, include); + } + } + return retVal; +} + +wchar_t getValue(wchar_t ch) { + if (ch >= '0' && ch <= '9') + ch -= '0'; + else if (ch >= 'a' && ch <= 'f') + ch = ch - 'a' + 10; + else if (ch >= 'A' && ch <= 'F') + ch = ch - 'A' + 10; + else + throw std::wstring(L"Invalid Character %") + ch; + + return ch; +} + +std::wstring fromPercentEncoding(const std::wstring &input) { + if (input.empty()) return {}; + std::string retVal; + retVal.reserve(input.length()); + for (auto ii = 0ULL; ii < input.length(); ++ii) { + auto c = input[ii]; + if (c == L'%' && (ii + 2) < input.length()) { + auto a = input[++ii]; + auto b = input[++ii]; + a = getValue(a); + b = getValue(b); + a = a << 4; + auto value = a | b; + retVal += std::string(1, value); + } else if (c == '+') + retVal += ' '; + else { + retVal += c; + } + } + std::wstring_convert > utf8_conv; + auto asBytes = utf8_conv.from_bytes(retVal); + + return asBytes; +} + +bool endsWith(const std::wstring &string, const std::wstring &suffix) { + if (suffix.length() > string.length()) return false; + + return string.compare(string.length() - suffix.length(), suffix.length(), + suffix) == 0; +} + +TStringVector splitString(const std::wstring &string, + const std::wstring &separator, bool skipEmptyParts) { + if (separator.empty()) { + if (!skipEmptyParts || !string.empty()) return {string}; + return {}; + } + + TStringVector strings; + auto prevPos = 0ULL; + auto startPos = string.find_first_of(separator); + while (startPos != std::string::npos) { + auto start = prevPos ? prevPos + 1 : prevPos; + auto len = prevPos ? (startPos - prevPos - 1) : startPos; + auto curr = string.substr(start, len); + prevPos = startPos; + if (!skipEmptyParts || !curr.empty()) strings.emplace_back(curr); + startPos = string.find_first_of(separator, prevPos + 1); + } + auto remainder = string.substr(prevPos ? prevPos + 1 : prevPos); + if (!skipEmptyParts || !remainder.empty()) strings.emplace_back(remainder); + + return strings; +} + +int64_t toInt(const std::wstring &string) { + int64_t retVal = 0; + try { + std::size_t lastPos{}; + retVal = std::stoul(string, &lastPos); + if (lastPos != string.length()) return 0; + } catch (...) { + } + return retVal; +} + +} // namespace NUtils diff --git a/cpp17/diff_match_patch_utils.h b/cpp17/diff_match_patch_utils.h new file mode 100644 index 0000000..7362a32 --- /dev/null +++ b/cpp17/diff_match_patch_utils.h @@ -0,0 +1,286 @@ +/* +/* + * Diff Match and Patch + * Copyright 2018 The diff-match-patch Authors. + * https://github.com/google/diff-match-patch + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DIFF_MATCH_PATCH_UTILS_H +#define DIFF_MATCH_PATCH_UTILS_H +// +#include +#include +#include +namespace NUtils { +using TStringVector = std::vector; + +/* + * Utility functions to replace Qt built in methods + */ + +/** + * A safer version of std::wstring.mid(pos). This one returns "" instead of + * null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @return Substring. + */ +std::wstring safeMid(const std::wstring &str, std::size_t pos); + +/** + * A safer version of std::wstring.mid(pos, len). This one returns "" instead + * of null when the postion equals the string length. + * @param str String to take a substring from. + * @param pos Position to start the substring from. + * @param len Length of substring. + * @return Substring. + */ +std::wstring safeMid(const std::wstring &str, std::size_t pos, std::size_t len); + +/** + * replaces QString::replace + * @param haystack String to replace all needles with to + * @param needle Substring to search for in the haystack + * @param to replacement string + * @return void. + */ +void replace(std::wstring &haystack, const std::wstring &needle, + const std::wstring &to); + +/** + * replaces returns the html percent encoded character equivalent + * @param c the input Character to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ +std::wstring toPercentEncoding(wchar_t c, + const std::wstring &exclude = std::wstring(), + const std::wstring &include = std::wstring()); + +/** + * return the html percent encoded string equivalent + * @param input the input String to return the encoded string of + * @param exclude The list of chars that are NOT to be encoded + * @param include The list of chars that are to be encoded + * @return the encoded string + */ +std::wstring toPercentEncoding(const std::wstring &input, + const std::wstring &exclude = std::wstring(), + const std::wstring &include = std::wstring()); + +/** + * returns the string equivalent removing any percent encoding and replacing it + * with the correct character + * @param input the input String to return the encoded string of + * @return the decoded string + */ +std::wstring fromPercentEncoding(const std::wstring &input); + +/** + * replaces returns integer value of the character, '0'-'9' = 0-9, 'A'-'F' = + * 10-15, 'a'-'f' = 10-15 + * @param input the value to return the integer value of + * @return the integer value of the character + */ +wchar_t getIntValue(wchar_t ch); + +/** + * return the integer value of the string + * @param string the String to be converted to an integer + * @return the integer version, on an invalid input returns 0 + */ +int64_t toInt(const std::wstring &string); + +/** + * return true if the string has the suffix + * @param string the String to check to see if it ends with suffix + * @param suffix the String to see if the input string ends with + * @return True if the string ends with suffix + */ +bool endsWith(const std::wstring &string, const std::wstring &suffix); + +/** + * return a TStringVector of the string split by separator + * @param string the String to be split + * @param separator the String to search in the input string to split on + * @param if true, empty values will be removed + * @return the split string + */ +TStringVector splitString(const std::wstring &string, + const std::wstring &separator, bool skipEmptyParts); + +/** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param objects optional objects to insert where the previous objects were + * removed + * @return the character as a single character string + */ +template +static std::vector Splice(std::vector &input, std::size_t start, + std::size_t count, + const std::vector &objects = {}) { + auto deletedRange = + std::vector({input.begin() + start, input.begin() + start + count}); + input.erase(input.begin() + start, input.begin() + start + count); + input.insert(input.begin() + start, objects.begin(), objects.end()); + + return deletedRange; +} + +/** + * splices the objects vector into the input vector + * @param input The input vector to splice out from + * @param start The position of the first item to remove from the input vector + * @param count How many values to remove from the input vector + * @param object individual object to insert where the previous objects were + * removed + * @return the character as a single character string + */ +template +static std::vector Splice(std::vector &input, std::size_t start, + std::size_t count, const T &object) { + return Splice(input, start, count, std::vector({object})); +} + +template +std::wstring to_wstring(const T & /*value*/, bool /*doubleQuoteEmpty*/) { + assert(false); + return {}; +} + +/** + * return the single character wide string for the given character + * @param value the char to be converted to an wstring + * @param doubleQuoteEmpty, if the return value would be empty, return "" + * @return the character as a single character string + */ +inline std::wstring to_wstring(const char &value, bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && (value == 0)) return LR"("")"; + + return std::wstring(1, static_cast(value)); +} + +template <> +inline std::wstring to_wstring(const bool &value, bool /*doubleQuoteOnEmpty*/) { + std::wstring retVal = std::wstring(value ? L"true" : L"false"); + return retVal; +} + +template <> +inline std::wstring to_wstring(const std::vector::reference &value, + bool /*doubleQuoteOnEmpty*/) { + std::wstring retVal = std::wstring(value ? L"true" : L"false"); + return retVal; +} + +template <> +inline std::wstring to_wstring(const std::string &string, + bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && string.empty()) return LR"("")"; + + std::wstring wstring(string.size(), + L' '); // Overestimate number of code points. + wstring.resize(std::mbstowcs(&wstring[0], string.c_str(), + string.size())); // Shrink to fit. + return wstring; +} + +template <> +inline std::wstring to_wstring(const wchar_t &value, bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && (value == 0)) return LR"("")"; + + return std::wstring(1, value); +} + +template <> +inline std::wstring to_wstring(const int &value, bool doubleQuoteEmpty) { + return to_wstring(static_cast(value), doubleQuoteEmpty); +} + +template <> +inline std::wstring to_wstring(const std::wstring &value, + bool doubleQuoteEmpty) { + if (doubleQuoteEmpty && value.empty()) return LR"("")"; + + return value; +} + +template +inline std::wstring to_wstring(const std::vector &values, + bool doubleQuoteEmpty) { + std::wstring retVal = L"("; + bool first = true; + for (auto &&curr : values) { + if (!first) { + retVal += L", "; + } + retVal += to_wstring(curr, doubleQuoteEmpty); + first = false; + } + retVal += L")"; + return retVal; +} + +template <> +inline std::wstring to_wstring(const std::vector &boolArray, + bool doubleQuoteOnEmpty) { + std::wstring retVal; + for (auto &&curr : boolArray) { + retVal += L"\t" + to_wstring(curr, doubleQuoteOnEmpty); + } + return retVal; +} + +template +inline typename std::enable_if_t, std::wstring> +to_wstring(const std::initializer_list &values, + bool doubleQuoteEmpty = false) { + if (doubleQuoteEmpty && (values.size() == 0)) return LR"(\"\")"; + + std::wstring retVal; + for (auto &&curr : values) { + retVal += to_wstring(curr, false); + } + return retVal; +} + +template +inline typename std::enable_if_t, std::wstring> +to_wstring(const std::initializer_list &values, + bool doubleQuoteEmpty = false) { + std::wstring retVal = L"("; + bool first = true; + for (auto &&curr : values) { + if (!first) { + retVal += L", "; + } + retVal += to_wstring(curr, doubleQuoteEmpty); + first = false; + } + retVal += L")"; + return retVal; +} + +template +std::wstring to_wstring(const T &value) { + return to_wstring(value, false); +} +}; // namespace NUtils + +#endif