diff --git a/java/src/name/fraser/neil/plaintext/diff_match_patch.java b/java/src/name/fraser/neil/plaintext/diff_match_patch.java index 9d07867..3e289fa 100644 --- a/java/src/name/fraser/neil/plaintext/diff_match_patch.java +++ b/java/src/name/fraser/neil/plaintext/diff_match_patch.java @@ -19,6 +19,7 @@ package name.fraser.neil.plaintext; import java.io.UnsupportedEncodingException; +import java.lang.Character; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.*; @@ -1429,7 +1430,22 @@ public int diff_levenshtein(List diffs) { */ public String diff_toDelta(List diffs) { StringBuilder text = new StringBuilder(); + char lastEnd = 0; for (Diff aDiff : diffs) { + + char thisTop = aDiff.text.charAt(0); + char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1); + + if (Character.isHighSurrogate(thisEnd)) { + aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1); + } + + if (0 != lastEnd && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) { + aDiff.text = lastEnd + aDiff.text; + } + + lastEnd = thisEnd; + switch (aDiff.operation) { case INSERT: try { diff --git a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java index 2f38793..aef98ce 100644 --- a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java +++ b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java @@ -424,6 +424,10 @@ public static void testDiffDelta() { assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta)); + diffs = diffList(new Diff(EQUAL, "\ud83d\ude4b\ud83d"), new Diff(INSERT, "\ude4c\ud83d"), new Diff(EQUAL, "\ude4b")); + delta = dmp.diff_toDelta(diffs); + assertEquals("diff_toDelta: Surrogate Pairs.", "=2\t+%F0%9F%99%8C\t=2", delta); + // Verify pool of unchanged characters. diffs = diffList(new Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")); String text2 = dmp.diff_text2(diffs);