From 143d61d5db87c07c8a15e3aad0af2847a27b064d Mon Sep 17 00:00:00 2001
From: Dennis Snell <dmsnell@xkq.io>
Date: Tue, 30 Jan 2024 16:51:42 -0700
Subject: [PATCH 1/5] JavaScript: Stop breaking surrogate pairs in toDelta()

Resolves Google/diff-match-patch#69 for JavaScript

Sometimes we can find a common prefix that runs into the middle of a
surrogate pair and we split that pair when building our diff groups.

This is fine as long as we are operating on UTF-16 code units. It
becomes problematic when we start trying to treat those substrings as
valid Unicode (or UTF-8) sequences.

When we pass these split groups into `toDelta()` we do just that and the
library crashes. In this patch we're post-processing the diff groups
before encoding them to make sure that we un-split the surrogate pairs.

The post-processed diffs should produce the same output when applying
the diffs. The diff string itself will be different but should change
that much - only by a single character at surrogate boundaries.
---
 javascript/diff_match_patch.js              | 109 ++++++------
 javascript/diff_match_patch_uncompressed.js | 178 +++++++++++++++++++-
 javascript/tests/diff_match_patch_test.js   | 130 ++++++++++++++
 3 files changed, 364 insertions(+), 53 deletions(-)

diff --git a/javascript/diff_match_patch.js b/javascript/diff_match_patch.js
index 2fe320a1..56b12cd8 100644
--- a/javascript/diff_match_patch.js
+++ b/javascript/diff_match_patch.js
@@ -1,55 +1,60 @@
 var diff_match_patch=function(){this.Diff_Timeout=1;this.Diff_EditCost=4;this.Match_Threshold=.5;this.Match_Distance=1E3;this.Patch_DeleteThreshold=.5;this.Patch_Margin=4;this.Match_MaxBits=32},DIFF_DELETE=-1,DIFF_INSERT=1,DIFF_EQUAL=0;diff_match_patch.Diff=function(a,b){this[0]=a;this[1]=b};diff_match_patch.Diff.prototype.length=2;diff_match_patch.Diff.prototype.toString=function(){return this[0]+","+this[1]};
-diff_match_patch.prototype.diff_main=function(a,b,c,d){"undefined"==typeof d&&(d=0>=this.Diff_Timeout?Number.MAX_VALUE:(new Date).getTime()+1E3*this.Diff_Timeout);if(null==a||null==b)throw Error("Null input. (diff_main)");if(a==b)return a?[new diff_match_patch.Diff(DIFF_EQUAL,a)]:[];"undefined"==typeof c&&(c=!0);var e=c,f=this.diff_commonPrefix(a,b);c=a.substring(0,f);a=a.substring(f);b=b.substring(f);f=this.diff_commonSuffix(a,b);var g=a.substring(a.length-f);a=a.substring(0,a.length-f);b=b.substring(0,
-b.length-f);a=this.diff_compute_(a,b,e,d);c&&a.unshift(new diff_match_patch.Diff(DIFF_EQUAL,c));g&&a.push(new diff_match_patch.Diff(DIFF_EQUAL,g));this.diff_cleanupMerge(a);return a};
-diff_match_patch.prototype.diff_compute_=function(a,b,c,d){if(!a)return[new diff_match_patch.Diff(DIFF_INSERT,b)];if(!b)return[new diff_match_patch.Diff(DIFF_DELETE,a)];var e=a.length>b.length?a:b,f=a.length>b.length?b:a,g=e.indexOf(f);return-1!=g?(c=[new diff_match_patch.Diff(DIFF_INSERT,e.substring(0,g)),new diff_match_patch.Diff(DIFF_EQUAL,f),new diff_match_patch.Diff(DIFF_INSERT,e.substring(g+f.length))],a.length>b.length&&(c[0][0]=c[2][0]=DIFF_DELETE),c):1==f.length?[new diff_match_patch.Diff(DIFF_DELETE,
-a),new diff_match_patch.Diff(DIFF_INSERT,b)]:(e=this.diff_halfMatch_(a,b))?(b=e[1],f=e[3],a=e[4],e=this.diff_main(e[0],e[2],c,d),c=this.diff_main(b,f,c,d),e.concat([new diff_match_patch.Diff(DIFF_EQUAL,a)],c)):c&&100<a.length&&100<b.length?this.diff_lineMode_(a,b,d):this.diff_bisect_(a,b,d)};
-diff_match_patch.prototype.diff_lineMode_=function(a,b,c){var d=this.diff_linesToChars_(a,b);a=d.chars1;b=d.chars2;d=d.lineArray;a=this.diff_main(a,b,!1,c);this.diff_charsToLines_(a,d);this.diff_cleanupSemantic(a);a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var e=d=b=0,f="",g="";b<a.length;){switch(a[b][0]){case DIFF_INSERT:e++;g+=a[b][1];break;case DIFF_DELETE:d++;f+=a[b][1];break;case DIFF_EQUAL:if(1<=d&&1<=e){a.splice(b-d-e,d+e);b=b-d-e;d=this.diff_main(f,g,!1,c);for(e=d.length-1;0<=e;e--)a.splice(b,
-0,d[e]);b+=d.length}d=e=0;g=f=""}b++}a.pop();return a};
-diff_match_patch.prototype.diff_bisect_=function(a,b,c){for(var d=a.length,e=b.length,f=Math.ceil((d+e)/2),g=2*f,h=Array(g),l=Array(g),k=0;k<g;k++)h[k]=-1,l[k]=-1;h[f+1]=0;l[f+1]=0;k=d-e;for(var m=0!=k%2,p=0,x=0,w=0,q=0,t=0;t<f&&!((new Date).getTime()>c);t++){for(var v=-t+p;v<=t-x;v+=2){var n=f+v;var r=v==-t||v!=t&&h[n-1]<h[n+1]?h[n+1]:h[n-1]+1;for(var y=r-v;r<d&&y<e&&a.charAt(r)==b.charAt(y);)r++,y++;h[n]=r;if(r>d)x+=2;else if(y>e)p+=2;else if(m&&(n=f+k-v,0<=n&&n<g&&-1!=l[n])){var u=d-l[n];if(r>=
-u)return this.diff_bisectSplit_(a,b,r,y,c)}}for(v=-t+w;v<=t-q;v+=2){n=f+v;u=v==-t||v!=t&&l[n-1]<l[n+1]?l[n+1]:l[n-1]+1;for(r=u-v;u<d&&r<e&&a.charAt(d-u-1)==b.charAt(e-r-1);)u++,r++;l[n]=u;if(u>d)q+=2;else if(r>e)w+=2;else if(!m&&(n=f+k-v,0<=n&&n<g&&-1!=h[n]&&(r=h[n],y=f+r-n,u=d-u,r>=u)))return this.diff_bisectSplit_(a,b,r,y,c)}}return[new diff_match_patch.Diff(DIFF_DELETE,a),new diff_match_patch.Diff(DIFF_INSERT,b)]};
-diff_match_patch.prototype.diff_bisectSplit_=function(a,b,c,d,e){var f=a.substring(0,c),g=b.substring(0,d);a=a.substring(c);b=b.substring(d);f=this.diff_main(f,g,!1,e);e=this.diff_main(a,b,!1,e);return f.concat(e)};
-diff_match_patch.prototype.diff_linesToChars_=function(a,b){function c(a){for(var b="",c=0,g=-1,h=d.length;g<a.length-1;){g=a.indexOf("\n",c);-1==g&&(g=a.length-1);var l=a.substring(c,g+1);(e.hasOwnProperty?e.hasOwnProperty(l):void 0!==e[l])?b+=String.fromCharCode(e[l]):(h==f&&(l=a.substring(c),g=a.length),b+=String.fromCharCode(h),e[l]=h,d[h++]=l);c=g+1}return b}var d=[],e={};d[0]="";var f=4E4,g=c(a);f=65535;var h=c(b);return{chars1:g,chars2:h,lineArray:d}};
-diff_match_patch.prototype.diff_charsToLines_=function(a,b){for(var c=0;c<a.length;c++){for(var d=a[c][1],e=[],f=0;f<d.length;f++)e[f]=b[d.charCodeAt(f)];a[c][1]=e.join("")}};diff_match_patch.prototype.diff_commonPrefix=function(a,b){if(!a||!b||a.charAt(0)!=b.charAt(0))return 0;for(var c=0,d=Math.min(a.length,b.length),e=d,f=0;c<e;)a.substring(f,e)==b.substring(f,e)?f=c=e:d=e,e=Math.floor((d-c)/2+c);return e};
-diff_match_patch.prototype.diff_commonSuffix=function(a,b){if(!a||!b||a.charAt(a.length-1)!=b.charAt(b.length-1))return 0;for(var c=0,d=Math.min(a.length,b.length),e=d,f=0;c<e;)a.substring(a.length-e,a.length-f)==b.substring(b.length-e,b.length-f)?f=c=e:d=e,e=Math.floor((d-c)/2+c);return e};
-diff_match_patch.prototype.diff_commonOverlap_=function(a,b){var c=a.length,d=b.length;if(0==c||0==d)return 0;c>d?a=a.substring(c-d):c<d&&(b=b.substring(0,c));c=Math.min(c,d);if(a==b)return c;d=0;for(var e=1;;){var f=a.substring(c-e);f=b.indexOf(f);if(-1==f)return d;e+=f;if(0==f||a.substring(c-e)==b.substring(0,e))d=e,e++}};
-diff_match_patch.prototype.diff_halfMatch_=function(a,b){function c(a,b,c){for(var d=a.substring(c,c+Math.floor(a.length/4)),e=-1,g="",h,k,l,m;-1!=(e=b.indexOf(d,e+1));){var p=f.diff_commonPrefix(a.substring(c),b.substring(e)),u=f.diff_commonSuffix(a.substring(0,c),b.substring(0,e));g.length<u+p&&(g=b.substring(e-u,e)+b.substring(e,e+p),h=a.substring(0,c-u),k=a.substring(c+p),l=b.substring(0,e-u),m=b.substring(e+p))}return 2*g.length>=a.length?[h,k,l,m,g]:null}if(0>=this.Diff_Timeout)return null;
-var d=a.length>b.length?a:b,e=a.length>b.length?b:a;if(4>d.length||2*e.length<d.length)return null;var f=this,g=c(d,e,Math.ceil(d.length/4));d=c(d,e,Math.ceil(d.length/2));if(g||d)g=d?g?g[4].length>d[4].length?g:d:d:g;else return null;if(a.length>b.length){d=g[0];e=g[1];var h=g[2];var l=g[3]}else h=g[0],l=g[1],d=g[2],e=g[3];return[d,e,h,l,g[4]]};
-diff_match_patch.prototype.diff_cleanupSemantic=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=0,h=0,l=0,k=0;f<a.length;)a[f][0]==DIFF_EQUAL?(c[d++]=f,g=l,h=k,k=l=0,e=a[f][1]):(a[f][0]==DIFF_INSERT?l+=a[f][1].length:k+=a[f][1].length,e&&e.length<=Math.max(g,h)&&e.length<=Math.max(l,k)&&(a.splice(c[d-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[c[d-1]+1][0]=DIFF_INSERT,d--,d--,f=0<d?c[d-1]:-1,k=l=h=g=0,e=null,b=!0)),f++;b&&this.diff_cleanupMerge(a);this.diff_cleanupSemanticLossless(a);for(f=1;f<
-a.length;){if(a[f-1][0]==DIFF_DELETE&&a[f][0]==DIFF_INSERT){b=a[f-1][1];c=a[f][1];d=this.diff_commonOverlap_(b,c);e=this.diff_commonOverlap_(c,b);if(d>=e){if(d>=b.length/2||d>=c.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,c.substring(0,d))),a[f-1][1]=b.substring(0,b.length-d),a[f+1][1]=c.substring(d),f++}else if(e>=b.length/2||e>=c.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,b.substring(0,e))),a[f-1][0]=DIFF_INSERT,a[f-1][1]=c.substring(0,c.length-e),a[f+1][0]=DIFF_DELETE,
+diff_match_patch.prototype.diff_main=function(a,b,d,c){"undefined"==typeof c&&(c=0>=this.Diff_Timeout?Number.MAX_VALUE:(new Date).getTime()+1E3*this.Diff_Timeout);if(null==a||null==b)throw Error("Null input. (diff_main)");if(a==b)return a?[new diff_match_patch.Diff(DIFF_EQUAL,a)]:[];"undefined"==typeof d&&(d=!0);var e=d,f=this.diff_commonPrefix(a,b);d=a.substring(0,f);a=a.substring(f);b=b.substring(f);f=this.diff_commonSuffix(a,b);var g=a.substring(a.length-f);a=a.substring(0,a.length-f);b=b.substring(0,
+b.length-f);a=this.diff_compute_(a,b,e,c);d&&a.unshift(new diff_match_patch.Diff(DIFF_EQUAL,d));g&&a.push(new diff_match_patch.Diff(DIFF_EQUAL,g));this.diff_cleanupMerge(a);return a};
+diff_match_patch.prototype.diff_compute_=function(a,b,d,c){if(!a)return[new diff_match_patch.Diff(DIFF_INSERT,b)];if(!b)return[new diff_match_patch.Diff(DIFF_DELETE,a)];var e=a.length>b.length?a:b,f=a.length>b.length?b:a,g=e.indexOf(f);return-1!=g?(d=[new diff_match_patch.Diff(DIFF_INSERT,e.substring(0,g)),new diff_match_patch.Diff(DIFF_EQUAL,f),new diff_match_patch.Diff(DIFF_INSERT,e.substring(g+f.length))],a.length>b.length&&(d[0][0]=d[2][0]=DIFF_DELETE),d):1==f.length?[new diff_match_patch.Diff(DIFF_DELETE,
+a),new diff_match_patch.Diff(DIFF_INSERT,b)]:(e=this.diff_halfMatch_(a,b))?(b=e[1],f=e[3],a=e[4],e=this.diff_main(e[0],e[2],d,c),d=this.diff_main(b,f,d,c),e.concat([new diff_match_patch.Diff(DIFF_EQUAL,a)],d)):d&&100<a.length&&100<b.length?this.diff_lineMode_(a,b,c):this.diff_bisect_(a,b,c)};
+diff_match_patch.prototype.diff_lineMode_=function(a,b,d){var c=this.diff_linesToChars_(a,b);a=c.chars1;b=c.chars2;c=c.lineArray;a=this.diff_main(a,b,!1,d);this.diff_charsToLines_(a,c);this.diff_cleanupSemantic(a);a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var e=c=b=0,f="",g="";b<a.length;){switch(a[b][0]){case DIFF_INSERT:e++;g+=a[b][1];break;case DIFF_DELETE:c++;f+=a[b][1];break;case DIFF_EQUAL:if(1<=c&&1<=e){a.splice(b-c-e,c+e);b=b-c-e;c=this.diff_main(f,g,!1,d);for(e=c.length-1;0<=e;e--)a.splice(b,
+0,c[e]);b+=c.length}c=e=0;g=f=""}b++}a.pop();return a};
+diff_match_patch.prototype.diff_bisect_=function(a,b,d){for(var c=a.length,e=b.length,f=Math.ceil((c+e)/2),g=2*f,h=Array(g),l=Array(g),k=0;k<g;k++)h[k]=-1,l[k]=-1;h[f+1]=0;l[f+1]=0;k=c-e;for(var m=0!=k%2,n=0,w=0,p=0,r=0,u=0;u<f&&!((new Date).getTime()>d);u++){for(var v=-u+n;v<=u-w;v+=2){var q=f+v;var t=v==-u||v!=u&&h[q-1]<h[q+1]?h[q+1]:h[q-1]+1;for(var y=t-v;t<c&&y<e&&a.charAt(t)==b.charAt(y);)t++,y++;h[q]=t;if(t>c)w+=2;else if(y>e)n+=2;else if(m&&(q=f+k-v,0<=q&&q<g&&-1!=l[q])){var x=c-l[q];if(t>=
+x)return this.diff_bisectSplit_(a,b,t,y,d)}}for(v=-u+p;v<=u-r;v+=2){q=f+v;x=v==-u||v!=u&&l[q-1]<l[q+1]?l[q+1]:l[q-1]+1;for(t=x-v;x<c&&t<e&&a.charAt(c-x-1)==b.charAt(e-t-1);)x++,t++;l[q]=x;if(x>c)r+=2;else if(t>e)p+=2;else if(!m&&(q=f+k-v,0<=q&&q<g&&-1!=h[q]&&(t=h[q],y=f+t-q,x=c-x,t>=x)))return this.diff_bisectSplit_(a,b,t,y,d)}}return[new diff_match_patch.Diff(DIFF_DELETE,a),new diff_match_patch.Diff(DIFF_INSERT,b)]};
+diff_match_patch.prototype.diff_bisectSplit_=function(a,b,d,c,e){var f=a.substring(0,d),g=b.substring(0,c);a=a.substring(d);b=b.substring(c);f=this.diff_main(f,g,!1,e);e=this.diff_main(a,b,!1,e);return f.concat(e)};
+diff_match_patch.prototype.diff_linesToChars_=function(a,b){function d(l){for(var k="",m=0,n=-1,w=c.length;n<l.length-1;){n=l.indexOf("\n",m);-1==n&&(n=l.length-1);var p=l.substring(m,n+1);(e.hasOwnProperty?e.hasOwnProperty(p):void 0!==e[p])?k+=String.fromCharCode(e[p]):(w==f&&(p=l.substring(m),n=l.length),k+=String.fromCharCode(w),e[p]=w,c[w++]=p);m=n+1}return k}var c=[],e={};c[0]="";var f=4E4,g=d(a);f=65535;var h=d(b);return{chars1:g,chars2:h,lineArray:c}};
+diff_match_patch.prototype.diff_charsToLines_=function(a,b){for(var d=0;d<a.length;d++){for(var c=a[d][1],e=[],f=0;f<c.length;f++)e[f]=b[c.charCodeAt(f)];a[d][1]=e.join("")}};diff_match_patch.prototype.diff_commonPrefix=function(a,b){if(!a||!b||a.charAt(0)!=b.charAt(0))return 0;for(var d=0,c=Math.min(a.length,b.length),e=c,f=0;d<e;)a.substring(f,e)==b.substring(f,e)?f=d=e:c=e,e=Math.floor((c-d)/2+d);return e};
+diff_match_patch.prototype.diff_commonSuffix=function(a,b){if(!a||!b||a.charAt(a.length-1)!=b.charAt(b.length-1))return 0;for(var d=0,c=Math.min(a.length,b.length),e=c,f=0;d<e;)a.substring(a.length-e,a.length-f)==b.substring(b.length-e,b.length-f)?f=d=e:c=e,e=Math.floor((c-d)/2+d);return e};
+diff_match_patch.prototype.diff_commonOverlap_=function(a,b){var d=a.length,c=b.length;if(0==d||0==c)return 0;d>c?a=a.substring(d-c):d<c&&(b=b.substring(0,d));d=Math.min(d,c);if(a==b)return d;c=0;for(var e=1;;){var f=a.substring(d-e);f=b.indexOf(f);if(-1==f)return c;e+=f;if(0==f||a.substring(d-e)==b.substring(0,e))c=e,e++}};
+diff_match_patch.prototype.diff_halfMatch_=function(a,b){function d(k,m,n){for(var w=k.substring(n,n+Math.floor(k.length/4)),p=-1,r="",u,v,q,t;-1!=(p=m.indexOf(w,p+1));){var y=f.diff_commonPrefix(k.substring(n),m.substring(p)),x=f.diff_commonSuffix(k.substring(0,n),m.substring(0,p));r.length<x+y&&(r=m.substring(p-x,p)+m.substring(p,p+y),u=k.substring(0,n-x),v=k.substring(n+y),q=m.substring(0,p-x),t=m.substring(p+y))}return 2*r.length>=k.length?[u,v,q,t,r]:null}if(0>=this.Diff_Timeout)return null;
+var c=a.length>b.length?a:b,e=a.length>b.length?b:a;if(4>c.length||2*e.length<c.length)return null;var f=this,g=d(c,e,Math.ceil(c.length/4));c=d(c,e,Math.ceil(c.length/2));if(g||c)g=c?g?g[4].length>c[4].length?g:c:c:g;else return null;if(a.length>b.length){c=g[0];e=g[1];var h=g[2];var l=g[3]}else h=g[0],l=g[1],c=g[2],e=g[3];return[c,e,h,l,g[4]]};
+diff_match_patch.prototype.diff_cleanupSemantic=function(a){for(var b=!1,d=[],c=0,e=null,f=0,g=0,h=0,l=0,k=0;f<a.length;)a[f][0]==DIFF_EQUAL?(d[c++]=f,g=l,h=k,k=l=0,e=a[f][1]):(a[f][0]==DIFF_INSERT?l+=a[f][1].length:k+=a[f][1].length,e&&e.length<=Math.max(g,h)&&e.length<=Math.max(l,k)&&(a.splice(d[c-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[d[c-1]+1][0]=DIFF_INSERT,c--,c--,f=0<c?d[c-1]:-1,k=l=h=g=0,e=null,b=!0)),f++;b&&this.diff_cleanupMerge(a);this.diff_cleanupSemanticLossless(a);for(f=1;f<
+a.length;){if(a[f-1][0]==DIFF_DELETE&&a[f][0]==DIFF_INSERT){b=a[f-1][1];d=a[f][1];c=this.diff_commonOverlap_(b,d);e=this.diff_commonOverlap_(d,b);if(c>=e){if(c>=b.length/2||c>=d.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,d.substring(0,c))),a[f-1][1]=b.substring(0,b.length-c),a[f+1][1]=d.substring(c),f++}else if(e>=b.length/2||e>=d.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,b.substring(0,e))),a[f-1][0]=DIFF_INSERT,a[f-1][1]=d.substring(0,d.length-e),a[f+1][0]=DIFF_DELETE,
 a[f+1][1]=b.substring(e),f++;f++}f++}};
-diff_match_patch.prototype.diff_cleanupSemanticLossless=function(a){function b(a,b){if(!a||!b)return 6;var c=a.charAt(a.length-1),d=b.charAt(0),e=c.match(diff_match_patch.nonAlphaNumericRegex_),f=d.match(diff_match_patch.nonAlphaNumericRegex_),g=e&&c.match(diff_match_patch.whitespaceRegex_),h=f&&d.match(diff_match_patch.whitespaceRegex_);c=g&&c.match(diff_match_patch.linebreakRegex_);d=h&&d.match(diff_match_patch.linebreakRegex_);var k=c&&a.match(diff_match_patch.blanklineEndRegex_),l=d&&b.match(diff_match_patch.blanklineStartRegex_);
-return k||l?5:c||d?4:e&&!g&&h?3:g||h?2:e||f?1:0}for(var c=1;c<a.length-1;){if(a[c-1][0]==DIFF_EQUAL&&a[c+1][0]==DIFF_EQUAL){var d=a[c-1][1],e=a[c][1],f=a[c+1][1],g=this.diff_commonSuffix(d,e);if(g){var h=e.substring(e.length-g);d=d.substring(0,d.length-g);e=h+e.substring(0,e.length-g);f=h+f}g=d;h=e;for(var l=f,k=b(d,e)+b(e,f);e.charAt(0)===f.charAt(0);){d+=e.charAt(0);e=e.substring(1)+f.charAt(0);f=f.substring(1);var m=b(d,e)+b(e,f);m>=k&&(k=m,g=d,h=e,l=f)}a[c-1][1]!=g&&(g?a[c-1][1]=g:(a.splice(c-
-1,1),c--),a[c][1]=h,l?a[c+1][1]=l:(a.splice(c+1,1),c--))}c++}};diff_match_patch.nonAlphaNumericRegex_=/[^a-zA-Z0-9]/;diff_match_patch.whitespaceRegex_=/\s/;diff_match_patch.linebreakRegex_=/[\r\n]/;diff_match_patch.blanklineEndRegex_=/\n\r?\n$/;diff_match_patch.blanklineStartRegex_=/^\r?\n\r?\n/;
-diff_match_patch.prototype.diff_cleanupEfficiency=function(a){for(var b=!1,c=[],d=0,e=null,f=0,g=!1,h=!1,l=!1,k=!1;f<a.length;)a[f][0]==DIFF_EQUAL?(a[f][1].length<this.Diff_EditCost&&(l||k)?(c[d++]=f,g=l,h=k,e=a[f][1]):(d=0,e=null),l=k=!1):(a[f][0]==DIFF_DELETE?k=!0:l=!0,e&&(g&&h&&l&&k||e.length<this.Diff_EditCost/2&&3==g+h+l+k)&&(a.splice(c[d-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[c[d-1]+1][0]=DIFF_INSERT,d--,e=null,g&&h?(l=k=!0,d=0):(d--,f=0<d?c[d-1]:-1,l=k=!1),b=!0)),f++;b&&this.diff_cleanupMerge(a)};
-diff_match_patch.prototype.diff_cleanupMerge=function(a){a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var b=0,c=0,d=0,e="",f="",g;b<a.length;)switch(a[b][0]){case DIFF_INSERT:d++;f+=a[b][1];b++;break;case DIFF_DELETE:c++;e+=a[b][1];b++;break;case DIFF_EQUAL:1<c+d?(0!==c&&0!==d&&(g=this.diff_commonPrefix(f,e),0!==g&&(0<b-c-d&&a[b-c-d-1][0]==DIFF_EQUAL?a[b-c-d-1][1]+=f.substring(0,g):(a.splice(0,0,new diff_match_patch.Diff(DIFF_EQUAL,f.substring(0,g))),b++),f=f.substring(g),e=e.substring(g)),
-g=this.diff_commonSuffix(f,e),0!==g&&(a[b][1]=f.substring(f.length-g)+a[b][1],f=f.substring(0,f.length-g),e=e.substring(0,e.length-g))),b-=c+d,a.splice(b,c+d),e.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_DELETE,e)),b++),f.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_INSERT,f)),b++),b++):0!==b&&a[b-1][0]==DIFF_EQUAL?(a[b-1][1]+=a[b][1],a.splice(b,1)):b++,c=d=0,f=e=""}""===a[a.length-1][1]&&a.pop();c=!1;for(b=1;b<a.length-1;)a[b-1][0]==DIFF_EQUAL&&a[b+1][0]==DIFF_EQUAL&&(a[b][1].substring(a[b][1].length-
-a[b-1][1].length)==a[b-1][1]?(a[b][1]=a[b-1][1]+a[b][1].substring(0,a[b][1].length-a[b-1][1].length),a[b+1][1]=a[b-1][1]+a[b+1][1],a.splice(b-1,1),c=!0):a[b][1].substring(0,a[b+1][1].length)==a[b+1][1]&&(a[b-1][1]+=a[b+1][1],a[b][1]=a[b][1].substring(a[b+1][1].length)+a[b+1][1],a.splice(b+1,1),c=!0)),b++;c&&this.diff_cleanupMerge(a)};
-diff_match_patch.prototype.diff_xIndex=function(a,b){var c=0,d=0,e=0,f=0,g;for(g=0;g<a.length;g++){a[g][0]!==DIFF_INSERT&&(c+=a[g][1].length);a[g][0]!==DIFF_DELETE&&(d+=a[g][1].length);if(c>b)break;e=c;f=d}return a.length!=g&&a[g][0]===DIFF_DELETE?f:f+(b-e)};
-diff_match_patch.prototype.diff_prettyHtml=function(a){for(var b=[],c=/&/g,d=/</g,e=/>/g,f=/\n/g,g=0;g<a.length;g++){var h=a[g][0],l=a[g][1].replace(c,"&amp;").replace(d,"&lt;").replace(e,"&gt;").replace(f,"&para;<br>");switch(h){case DIFF_INSERT:b[g]='<ins style="background:#e6ffe6;">'+l+"</ins>";break;case DIFF_DELETE:b[g]='<del style="background:#ffe6e6;">'+l+"</del>";break;case DIFF_EQUAL:b[g]="<span>"+l+"</span>"}}return b.join("")};
-diff_match_patch.prototype.diff_text1=function(a){for(var b=[],c=0;c<a.length;c++)a[c][0]!==DIFF_INSERT&&(b[c]=a[c][1]);return b.join("")};diff_match_patch.prototype.diff_text2=function(a){for(var b=[],c=0;c<a.length;c++)a[c][0]!==DIFF_DELETE&&(b[c]=a[c][1]);return b.join("")};
-diff_match_patch.prototype.diff_levenshtein=function(a){for(var b=0,c=0,d=0,e=0;e<a.length;e++){var f=a[e][1];switch(a[e][0]){case DIFF_INSERT:c+=f.length;break;case DIFF_DELETE:d+=f.length;break;case DIFF_EQUAL:b+=Math.max(c,d),d=c=0}}return b+=Math.max(c,d)};
-diff_match_patch.prototype.diff_toDelta=function(a){for(var b=[],c=0;c<a.length;c++)switch(a[c][0]){case DIFF_INSERT:b[c]="+"+encodeURI(a[c][1]);break;case DIFF_DELETE:b[c]="-"+a[c][1].length;break;case DIFF_EQUAL:b[c]="="+a[c][1].length}return b.join("\t").replace(/%20/g," ")};
-diff_match_patch.prototype.diff_fromDelta=function(a,b){for(var c=[],d=0,e=0,f=b.split(/\t/g),g=0;g<f.length;g++){var h=f[g].substring(1);switch(f[g].charAt(0)){case "+":try{c[d++]=new diff_match_patch.Diff(DIFF_INSERT,decodeURI(h))}catch(k){throw Error("Illegal escape in diff_fromDelta: "+h);}break;case "-":case "=":var l=parseInt(h,10);if(isNaN(l)||0>l)throw Error("Invalid number in diff_fromDelta: "+h);h=a.substring(e,e+=l);"="==f[g].charAt(0)?c[d++]=new diff_match_patch.Diff(DIFF_EQUAL,h):c[d++]=
-new diff_match_patch.Diff(DIFF_DELETE,h);break;default:if(f[g])throw Error("Invalid diff operation in diff_fromDelta: "+f[g]);}}if(e!=a.length)throw Error("Delta length ("+e+") does not equal source text length ("+a.length+").");return c};diff_match_patch.prototype.match_main=function(a,b,c){if(null==a||null==b||null==c)throw Error("Null input. (match_main)");c=Math.max(0,Math.min(c,a.length));return a==b?0:a.length?a.substring(c,c+b.length)==b?c:this.match_bitap_(a,b,c):-1};
-diff_match_patch.prototype.match_bitap_=function(a,b,c){function d(a,d){var e=a/b.length,g=Math.abs(c-d);return f.Match_Distance?e+g/f.Match_Distance:g?1:e}if(b.length>this.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,c);-1!=h&&(g=Math.min(d(0,h),g),h=a.lastIndexOf(b,c+b.length),-1!=h&&(g=Math.min(d(0,h),g)));var l=1<<b.length-1;h=-1;for(var k,m,p=b.length+a.length,x,w=0;w<b.length;w++){k=0;for(m=p;k<m;)d(w,
-c+m)<=g?k=m:p=m,m=Math.floor((p-k)/2+k);p=m;k=Math.max(1,c-m+1);var q=Math.min(c+m,a.length)+b.length;m=Array(q+2);for(m[q+1]=(1<<w)-1;q>=k;q--){var t=e[a.charAt(q-1)];m[q]=0===w?(m[q+1]<<1|1)&t:(m[q+1]<<1|1)&t|(x[q+1]|x[q])<<1|1|x[q+1];if(m[q]&l&&(t=d(w,q-1),t<=g))if(g=t,h=q-1,h>c)k=Math.max(1,2*c-h);else break}if(d(w+1,c)>g)break;x=m}return h};
-diff_match_patch.prototype.match_alphabet_=function(a){for(var b={},c=0;c<a.length;c++)b[a.charAt(c)]=0;for(c=0;c<a.length;c++)b[a.charAt(c)]|=1<<a.length-c-1;return b};
-diff_match_patch.prototype.patch_addContext_=function(a,b){if(0!=b.length){if(null===a.start2)throw Error("patch not initialized");for(var c=b.substring(a.start2,a.start2+a.length1),d=0;b.indexOf(c)!=b.lastIndexOf(c)&&c.length<this.Match_MaxBits-this.Patch_Margin-this.Patch_Margin;)d+=this.Patch_Margin,c=b.substring(a.start2-d,a.start2+a.length1+d);d+=this.Patch_Margin;(c=b.substring(a.start2-d,a.start2))&&a.diffs.unshift(new diff_match_patch.Diff(DIFF_EQUAL,c));(d=b.substring(a.start2+a.length1,
-a.start2+a.length1+d))&&a.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,d));a.start1-=c.length;a.start2-=c.length;a.length1+=c.length+d.length;a.length2+=c.length+d.length}};
-diff_match_patch.prototype.patch_make=function(a,b,c){if("string"==typeof a&&"string"==typeof b&&"undefined"==typeof c){var d=a;b=this.diff_main(d,b,!0);2<b.length&&(this.diff_cleanupSemantic(b),this.diff_cleanupEfficiency(b))}else if(a&&"object"==typeof a&&"undefined"==typeof b&&"undefined"==typeof c)b=a,d=this.diff_text1(b);else if("string"==typeof a&&b&&"object"==typeof b&&"undefined"==typeof c)d=a;else if("string"==typeof a&&"string"==typeof b&&c&&"object"==typeof c)d=a,b=c;else throw Error("Unknown call format to patch_make.");
-if(0===b.length)return[];c=[];a=new diff_match_patch.patch_obj;for(var e=0,f=0,g=0,h=d,l=0;l<b.length;l++){var k=b[l][0],m=b[l][1];e||k===DIFF_EQUAL||(a.start1=f,a.start2=g);switch(k){case DIFF_INSERT:a.diffs[e++]=b[l];a.length2+=m.length;d=d.substring(0,g)+m+d.substring(g);break;case DIFF_DELETE:a.length1+=m.length;a.diffs[e++]=b[l];d=d.substring(0,g)+d.substring(g+m.length);break;case DIFF_EQUAL:m.length<=2*this.Patch_Margin&&e&&b.length!=l+1?(a.diffs[e++]=b[l],a.length1+=m.length,a.length2+=m.length):
-m.length>=2*this.Patch_Margin&&e&&(this.patch_addContext_(a,h),c.push(a),a=new diff_match_patch.patch_obj,e=0,h=d,f=g)}k!==DIFF_INSERT&&(f+=m.length);k!==DIFF_DELETE&&(g+=m.length)}e&&(this.patch_addContext_(a,h),c.push(a));return c};
-diff_match_patch.prototype.patch_deepCopy=function(a){for(var b=[],c=0;c<a.length;c++){var d=a[c],e=new diff_match_patch.patch_obj;e.diffs=[];for(var f=0;f<d.diffs.length;f++)e.diffs[f]=new diff_match_patch.Diff(d.diffs[f][0],d.diffs[f][1]);e.start1=d.start1;e.start2=d.start2;e.length1=d.length1;e.length2=d.length2;b[c]=e}return b};
-diff_match_patch.prototype.patch_apply=function(a,b){if(0==a.length)return[b,[]];a=this.patch_deepCopy(a);var c=this.patch_addPadding(a);b=c+b+c;this.patch_splitMax(a);for(var d=0,e=[],f=0;f<a.length;f++){var g=a[f].start2+d,h=this.diff_text1(a[f].diffs),l=-1;if(h.length>this.Match_MaxBits){var k=this.match_main(b,h.substring(0,this.Match_MaxBits),g);-1!=k&&(l=this.match_main(b,h.substring(h.length-this.Match_MaxBits),g+h.length-this.Match_MaxBits),-1==l||k>=l)&&(k=-1)}else k=this.match_main(b,h,
-g);if(-1==k)e[f]=!1,d-=a[f].length2-a[f].length1;else if(e[f]=!0,d=k-g,g=-1==l?b.substring(k,k+h.length):b.substring(k,l+this.Match_MaxBits),h==g)b=b.substring(0,k)+this.diff_text2(a[f].diffs)+b.substring(k+h.length);else if(g=this.diff_main(h,g,!1),h.length>this.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);h=0;var m;for(l=0;l<a[f].diffs.length;l++){var p=a[f].diffs[l];p[0]!==DIFF_EQUAL&&(m=this.diff_xIndex(g,h));p[0]===
-DIFF_INSERT?b=b.substring(0,k+m)+p[1]+b.substring(k+m):p[0]===DIFF_DELETE&&(b=b.substring(0,k+m)+b.substring(k+this.diff_xIndex(g,h+p[1].length)));p[0]!==DIFF_DELETE&&(h+=p[1].length)}}}b=b.substring(c.length,b.length-c.length);return[b,e]};
-diff_match_patch.prototype.patch_addPadding=function(a){for(var b=this.Patch_Margin,c="",d=1;d<=b;d++)c+=String.fromCharCode(d);for(d=0;d<a.length;d++)a[d].start1+=b,a[d].start2+=b;d=a[0];var e=d.diffs;if(0==e.length||e[0][0]!=DIFF_EQUAL)e.unshift(new diff_match_patch.Diff(DIFF_EQUAL,c)),d.start1-=b,d.start2-=b,d.length1+=b,d.length2+=b;else if(b>e[0][1].length){var f=b-e[0][1].length;e[0][1]=c.substring(e[0][1].length)+e[0][1];d.start1-=f;d.start2-=f;d.length1+=f;d.length2+=f}d=a[a.length-1];e=d.diffs;
-0==e.length||e[e.length-1][0]!=DIFF_EQUAL?(e.push(new diff_match_patch.Diff(DIFF_EQUAL,c)),d.length1+=b,d.length2+=b):b>e[e.length-1][1].length&&(f=b-e[e.length-1][1].length,e[e.length-1][1]+=c.substring(0,f),d.length1+=f,d.length2+=f);return c};
-diff_match_patch.prototype.patch_splitMax=function(a){for(var b=this.Match_MaxBits,c=0;c<a.length;c++)if(!(a[c].length1<=b)){var d=a[c];a.splice(c--,1);for(var e=d.start1,f=d.start2,g="";0!==d.diffs.length;){var h=new diff_match_patch.patch_obj,l=!0;h.start1=e-g.length;h.start2=f-g.length;""!==g&&(h.length1=h.length2=g.length,h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g)));for(;0!==d.diffs.length&&h.length1<b-this.Patch_Margin;){g=d.diffs[0][0];var k=d.diffs[0][1];g===DIFF_INSERT?(h.length2+=
-k.length,f+=k.length,h.diffs.push(d.diffs.shift()),l=!1):g===DIFF_DELETE&&1==h.diffs.length&&h.diffs[0][0]==DIFF_EQUAL&&k.length>2*b?(h.length1+=k.length,e+=k.length,l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),d.diffs.shift()):(k=k.substring(0,b-h.length1-this.Patch_Margin),h.length1+=k.length,e+=k.length,g===DIFF_EQUAL?(h.length2+=k.length,f+=k.length):l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),k==d.diffs[0][1]?d.diffs.shift():d.diffs[0][1]=d.diffs[0][1].substring(k.length))}g=this.diff_text2(h.diffs);
-g=g.substring(g.length-this.Patch_Margin);k=this.diff_text1(d.diffs).substring(0,this.Patch_Margin);""!==k&&(h.length1+=k.length,h.length2+=k.length,0!==h.diffs.length&&h.diffs[h.diffs.length-1][0]===DIFF_EQUAL?h.diffs[h.diffs.length-1][1]+=k:h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,k)));l||a.splice(++c,0,h)}}};diff_match_patch.prototype.patch_toText=function(a){for(var b=[],c=0;c<a.length;c++)b[c]=a[c];return b.join("")};
-diff_match_patch.prototype.patch_fromText=function(a){var b=[];if(!a)return b;a=a.split("\n");for(var c=0,d=/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/;c<a.length;){var e=a[c].match(d);if(!e)throw Error("Invalid patch string: "+a[c]);var f=new diff_match_patch.patch_obj;b.push(f);f.start1=parseInt(e[1],10);""===e[2]?(f.start1--,f.length1=1):"0"==e[2]?f.length1=0:(f.start1--,f.length1=parseInt(e[2],10));f.start2=parseInt(e[3],10);""===e[4]?(f.start2--,f.length2=1):"0"==e[4]?f.length2=0:(f.start2--,f.length2=
-parseInt(e[4],10));for(c++;c<a.length;){e=a[c].charAt(0);try{var g=decodeURI(a[c].substring(1))}catch(h){throw Error("Illegal escape in patch_fromText: "+g);}if("-"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_DELETE,g));else if("+"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_INSERT,g));else if(" "==e)f.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g));else if("@"==e)break;else if(""!==e)throw Error('Invalid patch mode "'+e+'" in: '+g);c++}}return b};
+diff_match_patch.prototype.diff_cleanupSemanticLossless=function(a){function b(n,w){if(!n||!w)return 6;var p=n.charAt(n.length-1),r=w.charAt(0),u=p.match(diff_match_patch.nonAlphaNumericRegex_),v=r.match(diff_match_patch.nonAlphaNumericRegex_),q=u&&p.match(diff_match_patch.whitespaceRegex_),t=v&&r.match(diff_match_patch.whitespaceRegex_);p=q&&p.match(diff_match_patch.linebreakRegex_);r=t&&r.match(diff_match_patch.linebreakRegex_);var y=p&&n.match(diff_match_patch.blanklineEndRegex_),x=r&&w.match(diff_match_patch.blanklineStartRegex_);
+return y||x?5:p||r?4:u&&!q&&t?3:q||t?2:u||v?1:0}for(var d=1;d<a.length-1;){if(a[d-1][0]==DIFF_EQUAL&&a[d+1][0]==DIFF_EQUAL){var c=a[d-1][1],e=a[d][1],f=a[d+1][1],g=this.diff_commonSuffix(c,e);if(g){var h=e.substring(e.length-g);c=c.substring(0,c.length-g);e=h+e.substring(0,e.length-g);f=h+f}g=c;h=e;for(var l=f,k=b(c,e)+b(e,f);e.charAt(0)===f.charAt(0);){c+=e.charAt(0);e=e.substring(1)+f.charAt(0);f=f.substring(1);var m=b(c,e)+b(e,f);m>=k&&(k=m,g=c,h=e,l=f)}a[d-1][1]!=g&&(g?a[d-1][1]=g:(a.splice(d-
+1,1),d--),a[d][1]=h,l?a[d+1][1]=l:(a.splice(d+1,1),d--))}d++}};diff_match_patch.nonAlphaNumericRegex_=/[^a-zA-Z0-9]/;diff_match_patch.whitespaceRegex_=/\s/;diff_match_patch.linebreakRegex_=/[\r\n]/;diff_match_patch.blanklineEndRegex_=/\n\r?\n$/;diff_match_patch.blanklineStartRegex_=/^\r?\n\r?\n/;
+diff_match_patch.prototype.diff_cleanupEfficiency=function(a){for(var b=!1,d=[],c=0,e=null,f=0,g=!1,h=!1,l=!1,k=!1;f<a.length;)a[f][0]==DIFF_EQUAL?(a[f][1].length<this.Diff_EditCost&&(l||k)?(d[c++]=f,g=l,h=k,e=a[f][1]):(c=0,e=null),l=k=!1):(a[f][0]==DIFF_DELETE?k=!0:l=!0,e&&(g&&h&&l&&k||e.length<this.Diff_EditCost/2&&3==g+h+l+k)&&(a.splice(d[c-1],0,new diff_match_patch.Diff(DIFF_DELETE,e)),a[d[c-1]+1][0]=DIFF_INSERT,c--,e=null,g&&h?(l=k=!0,c=0):(c--,f=0<c?d[c-1]:-1,l=k=!1),b=!0)),f++;b&&this.diff_cleanupMerge(a)};
+diff_match_patch.prototype.diff_cleanupMerge=function(a){a.push(new diff_match_patch.Diff(DIFF_EQUAL,""));for(var b=0,d=0,c=0,e="",f="",g;b<a.length;)switch(a[b][0]){case DIFF_INSERT:c++;f+=a[b][1];b++;break;case DIFF_DELETE:d++;e+=a[b][1];b++;break;case DIFF_EQUAL:1<d+c?(0!==d&&0!==c&&(g=this.diff_commonPrefix(f,e),0!==g&&(0<b-d-c&&a[b-d-c-1][0]==DIFF_EQUAL?a[b-d-c-1][1]+=f.substring(0,g):(a.splice(0,0,new diff_match_patch.Diff(DIFF_EQUAL,f.substring(0,g))),b++),f=f.substring(g),e=e.substring(g)),
+g=this.diff_commonSuffix(f,e),0!==g&&(a[b][1]=f.substring(f.length-g)+a[b][1],f=f.substring(0,f.length-g),e=e.substring(0,e.length-g))),b-=d+c,a.splice(b,d+c),e.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_DELETE,e)),b++),f.length&&(a.splice(b,0,new diff_match_patch.Diff(DIFF_INSERT,f)),b++),b++):0!==b&&a[b-1][0]==DIFF_EQUAL?(a[b-1][1]+=a[b][1],a.splice(b,1)):b++,d=c=0,f=e=""}""===a[a.length-1][1]&&a.pop();d=!1;for(b=1;b<a.length-1;)a[b-1][0]==DIFF_EQUAL&&a[b+1][0]==DIFF_EQUAL&&(a[b][1].substring(a[b][1].length-
+a[b-1][1].length)==a[b-1][1]?(a[b][1]=a[b-1][1]+a[b][1].substring(0,a[b][1].length-a[b-1][1].length),a[b+1][1]=a[b-1][1]+a[b+1][1],a.splice(b-1,1),d=!0):a[b][1].substring(0,a[b+1][1].length)==a[b+1][1]&&(a[b-1][1]+=a[b+1][1],a[b][1]=a[b][1].substring(a[b+1][1].length)+a[b+1][1],a.splice(b+1,1),d=!0)),b++;d&&this.diff_cleanupMerge(a)};
+diff_match_patch.prototype.diff_cleanupSplitSurrogates=function(a){for(var b,d=0;d<a.length;d++){var c=a[d],e=c[1][0],f=c[1][c[1].length-1];0===c[1].length?a.splice(d--,1):(f&&this.isHighSurrogate(f)&&(b=f,c[1]=c[1].slice(0,-1)),b&&e&&this.isHighSurrogate(b)&&this.isLowSurrogate(e)&&(c[1]=b+c[1]),0===c[1].length&&a.splice(d--,1))}return a};diff_match_patch.prototype.isHighSurrogate=function(a){a=a.charCodeAt(0);return 55296<=a&&56319>=a};
+diff_match_patch.prototype.isLowSurrogate=function(a){a=a.charCodeAt(0);return 56320<=a&&57343>=a};
+diff_match_patch.prototype.digit16=function(a){switch(a){case "0":return 0;case "1":return 1;case "2":return 2;case "3":return 3;case "4":return 4;case "5":return 5;case "6":return 6;case "7":return 7;case "8":return 8;case "9":return 9;case "A":case "a":return 10;case "B":case "b":return 11;case "C":case "c":return 12;case "D":case "d":return 13;case "E":case "e":return 14;case "F":case "f":return 15;default:throw Error("Invalid hex-code");}};
+diff_match_patch.prototype.decodeURI=function(a){try{return decodeURI(a)}catch(h){for(var b=0,d="";b<a.length;)if("%"!==a[b])d+=a[b++];else{var c=(this.digit16(a[b+1])<<4)+this.digit16(a[b+2]);if(0===(c&128))d+=String.fromCharCode(c),b+=3;else{if("%"!==a[b+3])throw new URIError("URI malformed");var e=(this.digit16(a[b+4])<<4)+this.digit16(a[b+5]);if(128!==(e&192))throw new URIError("URI malformed");e&=63;if(192===(c&224))d+=String.fromCharCode((c&31)<<6|e),b+=6;else{if("%"!==a[b+6])throw new URIError("URI malformed");
+var f=(this.digit16(a[b+7])<<4)+this.digit16(a[b+8]);if(128!==(f&192))throw new URIError("URI malformed");f&=63;if(224===(c&240))d+=String.fromCharCode((c&15)<<12|e<<6|f),b+=9;else{if("%"!==a[b+9])throw new URIError("URI malformed");var g=(this.digit16(a[b+10])<<4)+this.digit16(a[b+11]);if(128!==(g&192))throw new URIError("URI malformed");g&=63;if(240===(c&248)&&(c=(c&7)<<18|e<<12|f<<6|g,65536<=c&&1114111>=c)){d+=String.fromCharCode((c&65535)>>>10&1023|55296);d+=String.fromCharCode(56320|c&1023);
+b+=12;continue}throw new URIError("URI malformed");}}}}return d}};diff_match_patch.prototype.diff_xIndex=function(a,b){var d=0,c=0,e=0,f=0,g;for(g=0;g<a.length;g++){a[g][0]!==DIFF_INSERT&&(d+=a[g][1].length);a[g][0]!==DIFF_DELETE&&(c+=a[g][1].length);if(d>b)break;e=d;f=c}return a.length!=g&&a[g][0]===DIFF_DELETE?f:f+(b-e)};
+diff_match_patch.prototype.diff_prettyHtml=function(a){a=this.diff_cleanupSplitSurrogates(a);for(var b=[],d=/&/g,c=/</g,e=/>/g,f=/\n/g,g=0;g<a.length;g++){var h=a[g][0],l=a[g][1].replace(d,"&amp;").replace(c,"&lt;").replace(e,"&gt;").replace(f,"&para;<br>");switch(h){case DIFF_INSERT:b[g]='<ins style="background:#e6ffe6;">'+l+"</ins>";break;case DIFF_DELETE:b[g]='<del style="background:#ffe6e6;">'+l+"</del>";break;case DIFF_EQUAL:b[g]="<span>"+l+"</span>"}}return b.join("")};
+diff_match_patch.prototype.diff_text1=function(a){for(var b=[],d=0;d<a.length;d++)a[d][0]!==DIFF_INSERT&&(b[d]=a[d][1]);return b.join("")};diff_match_patch.prototype.diff_text2=function(a){for(var b=[],d=0;d<a.length;d++)a[d][0]!==DIFF_DELETE&&(b[d]=a[d][1]);return b.join("")};
+diff_match_patch.prototype.diff_levenshtein=function(a){for(var b=0,d=0,c=0,e=0;e<a.length;e++){var f=a[e][1];switch(a[e][0]){case DIFF_INSERT:d+=f.length;break;case DIFF_DELETE:c+=f.length;break;case DIFF_EQUAL:b+=Math.max(d,c),c=d=0}}return b+=Math.max(d,c)};
+diff_match_patch.prototype.diff_toDelta=function(a){a=this.diff_cleanupSplitSurrogates(a);for(var b=[],d=0;d<a.length;d++)switch(a[d][0]){case DIFF_INSERT:b[d]="+"+encodeURI(a[d][1]);break;case DIFF_DELETE:b[d]="-"+a[d][1].length;break;case DIFF_EQUAL:b[d]="="+a[d][1].length}return b.join("\t").replace(/%20/g," ")};
+diff_match_patch.prototype.diff_fromDelta=function(a,b){for(var d=[],c=0,e=0,f=b.split(/\t/g),g=0;g<f.length;g++){var h=f[g].substring(1);switch(f[g].charAt(0)){case "+":try{d[c++]=new diff_match_patch.Diff(DIFF_INSERT,this.decodeURI(h))}catch(k){throw Error("Illegal escape in diff_fromDelta: "+h);}break;case "-":case "=":var l=parseInt(h,10);if(isNaN(l)||0>l)throw Error("Invalid number in diff_fromDelta: "+h);h=a.substring(e,e+=l);"="==f[g].charAt(0)?d[c++]=new diff_match_patch.Diff(DIFF_EQUAL,h):
+d[c++]=new diff_match_patch.Diff(DIFF_DELETE,h);break;default:if(f[g])throw Error("Invalid diff operation in diff_fromDelta: "+f[g]);}}if(e!=a.length)throw Error("Delta length ("+e+") does not equal source text length ("+a.length+").");return d};diff_match_patch.prototype.match_main=function(a,b,d){if(null==a||null==b||null==d)throw Error("Null input. (match_main)");d=Math.max(0,Math.min(d,a.length));return a==b?0:a.length?a.substring(d,d+b.length)==b?d:this.match_bitap_(a,b,d):-1};
+diff_match_patch.prototype.match_bitap_=function(a,b,d){function c(v,q){var t=v/b.length,y=Math.abs(d-q);return f.Match_Distance?t+y/f.Match_Distance:y?1:t}if(b.length>this.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,d);-1!=h&&(g=Math.min(c(0,h),g),h=a.lastIndexOf(b,d+b.length),-1!=h&&(g=Math.min(c(0,h),g)));var l=1<<b.length-1;h=-1;for(var k,m,n=b.length+a.length,w,p=0;p<b.length;p++){k=0;for(m=n;k<m;)c(p,
+d+m)<=g?k=m:n=m,m=Math.floor((n-k)/2+k);n=m;k=Math.max(1,d-m+1);var r=Math.min(d+m,a.length)+b.length;m=Array(r+2);for(m[r+1]=(1<<p)-1;r>=k;r--){var u=e[a.charAt(r-1)];m[r]=0===p?(m[r+1]<<1|1)&u:(m[r+1]<<1|1)&u|(w[r+1]|w[r])<<1|1|w[r+1];if(m[r]&l&&(u=c(p,r-1),u<=g))if(g=u,h=r-1,h>d)k=Math.max(1,2*d-h);else break}if(c(p+1,d)>g)break;w=m}return h};
+diff_match_patch.prototype.match_alphabet_=function(a){for(var b={},d=0;d<a.length;d++)b[a.charAt(d)]=0;for(d=0;d<a.length;d++)b[a.charAt(d)]|=1<<a.length-d-1;return b};
+diff_match_patch.prototype.patch_addContext_=function(a,b){if(0!=b.length){if(null===a.start2)throw Error("patch not initialized");for(var d=b.substring(a.start2,a.start2+a.length1),c=0;b.indexOf(d)!=b.lastIndexOf(d)&&d.length<this.Match_MaxBits-this.Patch_Margin-this.Patch_Margin;)c+=this.Patch_Margin,d=b.substring(a.start2-c,a.start2+a.length1+c);c+=this.Patch_Margin;0<a.start2-c&&diff_match_patch.prototype.isLowSurrogate(b[a.start2-c])&&c++;(d=b.substring(a.start2-c,a.start2))&&a.diffs.unshift(new diff_match_patch.Diff(DIFF_EQUAL,
+d));a.start2+a.length1+c<b.length&&diff_match_patch.prototype.isHighSurrogate(b[a.start2+a.length1+c])&&c++;(c=b.substring(a.start2+a.length1,a.start2+a.length1+c))&&a.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,c));a.start1-=d.length;a.start2-=d.length;a.length1+=d.length+c.length;a.length2+=d.length+c.length}};
+diff_match_patch.prototype.patch_make=function(a,b,d){if("string"==typeof a&&"string"==typeof b&&"undefined"==typeof d){var c=a;b=this.diff_main(c,b,!0);2<b.length&&(this.diff_cleanupSemantic(b),this.diff_cleanupEfficiency(b))}else if(a&&"object"==typeof a&&"undefined"==typeof b&&"undefined"==typeof d)b=a,c=this.diff_text1(b);else if("string"==typeof a&&b&&"object"==typeof b&&"undefined"==typeof d)c=a;else if("string"==typeof a&&"string"==typeof b&&d&&"object"==typeof d)c=a,b=d;else throw Error("Unknown call format to patch_make.");
+if(0===b.length)return[];b=this.diff_cleanupSplitSurrogates(b);d=[];a=new diff_match_patch.patch_obj;for(var e=0,f=0,g=0,h=c,l=0;l<b.length;l++){var k=b[l][0],m=b[l][1];e||k===DIFF_EQUAL||(a.start1=f,a.start2=g);switch(k){case DIFF_INSERT:a.diffs[e++]=b[l];a.length2+=m.length;c=c.substring(0,g)+m+c.substring(g);break;case DIFF_DELETE:a.length1+=m.length;a.diffs[e++]=b[l];c=c.substring(0,g)+c.substring(g+m.length);break;case DIFF_EQUAL:m.length<=2*this.Patch_Margin&&e&&b.length!=l+1?(a.diffs[e++]=
+b[l],a.length1+=m.length,a.length2+=m.length):m.length>=2*this.Patch_Margin&&e&&(this.patch_addContext_(a,h),d.push(a),a=new diff_match_patch.patch_obj,e=0,h=c,f=g)}k!==DIFF_INSERT&&(f+=m.length);k!==DIFF_DELETE&&(g+=m.length)}e&&(this.patch_addContext_(a,h),d.push(a));return d};
+diff_match_patch.prototype.patch_deepCopy=function(a){for(var b=[],d=0;d<a.length;d++){var c=a[d],e=new diff_match_patch.patch_obj;e.diffs=[];for(var f=0;f<c.diffs.length;f++)e.diffs[f]=new diff_match_patch.Diff(c.diffs[f][0],c.diffs[f][1]);e.start1=c.start1;e.start2=c.start2;e.length1=c.length1;e.length2=c.length2;b[d]=e}return b};
+diff_match_patch.prototype.patch_apply=function(a,b){if(0==a.length)return[b,[]];a=this.patch_deepCopy(a);var d=this.patch_addPadding(a);b=d+b+d;this.patch_splitMax(a);for(var c=0,e=[],f=0;f<a.length;f++){var g=a[f].start2+c,h=this.diff_text1(a[f].diffs),l=-1;if(h.length>this.Match_MaxBits){var k=this.match_main(b,h.substring(0,this.Match_MaxBits),g);-1!=k&&(l=this.match_main(b,h.substring(h.length-this.Match_MaxBits),g+h.length-this.Match_MaxBits),-1==l||k>=l)&&(k=-1)}else k=this.match_main(b,h,
+g);if(-1==k)e[f]=!1,c-=a[f].length2-a[f].length1;else if(e[f]=!0,c=k-g,g=-1==l?b.substring(k,k+h.length):b.substring(k,l+this.Match_MaxBits),h==g)b=b.substring(0,k)+this.diff_text2(a[f].diffs)+b.substring(k+h.length);else if(g=this.diff_main(h,g,!1),h.length>this.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);h=0;var m;for(l=0;l<a[f].diffs.length;l++){var n=a[f].diffs[l];n[0]!==DIFF_EQUAL&&(m=this.diff_xIndex(g,h));n[0]===
+DIFF_INSERT?b=b.substring(0,k+m)+n[1]+b.substring(k+m):n[0]===DIFF_DELETE&&(b=b.substring(0,k+m)+b.substring(k+this.diff_xIndex(g,h+n[1].length)));n[0]!==DIFF_DELETE&&(h+=n[1].length)}}}b=b.substring(d.length,b.length-d.length);return[b,e]};
+diff_match_patch.prototype.patch_addPadding=function(a){for(var b=this.Patch_Margin,d="",c=1;c<=b;c++)d+=String.fromCharCode(c);for(c=0;c<a.length;c++)a[c].start1+=b,a[c].start2+=b;c=a[0];var e=c.diffs;if(0==e.length||e[0][0]!=DIFF_EQUAL)e.unshift(new diff_match_patch.Diff(DIFF_EQUAL,d)),c.start1-=b,c.start2-=b,c.length1+=b,c.length2+=b;else if(b>e[0][1].length){var f=b-e[0][1].length;e[0][1]=d.substring(e[0][1].length)+e[0][1];c.start1-=f;c.start2-=f;c.length1+=f;c.length2+=f}c=a[a.length-1];e=c.diffs;
+0==e.length||e[e.length-1][0]!=DIFF_EQUAL?(e.push(new diff_match_patch.Diff(DIFF_EQUAL,d)),c.length1+=b,c.length2+=b):b>e[e.length-1][1].length&&(f=b-e[e.length-1][1].length,e[e.length-1][1]+=d.substring(0,f),c.length1+=f,c.length2+=f);return d};
+diff_match_patch.prototype.patch_splitMax=function(a){for(var b=this.Match_MaxBits,d=0;d<a.length;d++)if(!(a[d].length1<=b)){var c=a[d];a.splice(d--,1);for(var e=c.start1,f=c.start2,g="";0!==c.diffs.length;){var h=new diff_match_patch.patch_obj,l=!0;h.start1=e-g.length;h.start2=f-g.length;""!==g&&(h.length1=h.length2=g.length,h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g)));for(;0!==c.diffs.length&&h.length1<b-this.Patch_Margin;){g=c.diffs[0][0];var k=c.diffs[0][1];g===DIFF_INSERT?(h.length2+=
+k.length,f+=k.length,h.diffs.push(c.diffs.shift()),l=!1):g===DIFF_DELETE&&1==h.diffs.length&&h.diffs[0][0]==DIFF_EQUAL&&k.length>2*b?(h.length1+=k.length,e+=k.length,l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),c.diffs.shift()):(k=k.substring(0,b-h.length1-this.Patch_Margin),h.length1+=k.length,e+=k.length,g===DIFF_EQUAL?(h.length2+=k.length,f+=k.length):l=!1,h.diffs.push(new diff_match_patch.Diff(g,k)),k==c.diffs[0][1]?c.diffs.shift():c.diffs[0][1]=c.diffs[0][1].substring(k.length))}g=this.diff_text2(h.diffs);
+g=g.substring(g.length-this.Patch_Margin);k=this.diff_text1(c.diffs).substring(0,this.Patch_Margin);""!==k&&(h.length1+=k.length,h.length2+=k.length,0!==h.diffs.length&&h.diffs[h.diffs.length-1][0]===DIFF_EQUAL?h.diffs[h.diffs.length-1][1]+=k:h.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,k)));l||a.splice(++d,0,h)}}};diff_match_patch.prototype.patch_toText=function(a){for(var b=[],d=0;d<a.length;d++)b[d]=a[d];return b.join("")};
+diff_match_patch.prototype.patch_fromText=function(a){var b=[];if(!a)return b;a=a.split("\n");for(var d=0,c=/^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$/;d<a.length;){var e=a[d].match(c);if(!e)throw Error("Invalid patch string: "+a[d]);var f=new diff_match_patch.patch_obj;b.push(f);f.start1=parseInt(e[1],10);""===e[2]?(f.start1--,f.length1=1):"0"==e[2]?f.length1=0:(f.start1--,f.length1=parseInt(e[2],10));f.start2=parseInt(e[3],10);""===e[4]?(f.start2--,f.length2=1):"0"==e[4]?f.length2=0:(f.start2--,f.length2=
+parseInt(e[4],10));for(d++;d<a.length;){e=a[d].charAt(0);try{var g=decodeURI(a[d].substring(1))}catch(h){throw Error("Illegal escape in patch_fromText: "+g);}if("-"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_DELETE,g));else if("+"==e)f.diffs.push(new diff_match_patch.Diff(DIFF_INSERT,g));else if(" "==e)f.diffs.push(new diff_match_patch.Diff(DIFF_EQUAL,g));else if("@"==e)break;else if(""!==e)throw Error('Invalid patch mode "'+e+'" in: '+g);d++}}return b};
 diff_match_patch.patch_obj=function(){this.diffs=[];this.start2=this.start1=null;this.length2=this.length1=0};
-diff_match_patch.patch_obj.prototype.toString=function(){for(var a=["@@ -"+(0===this.length1?this.start1+",0":1==this.length1?this.start1+1:this.start1+1+","+this.length1)+" +"+(0===this.length2?this.start2+",0":1==this.length2?this.start2+1:this.start2+1+","+this.length2)+" @@\n"],b,c=0;c<this.diffs.length;c++){switch(this.diffs[c][0]){case DIFF_INSERT:b="+";break;case DIFF_DELETE:b="-";break;case DIFF_EQUAL:b=" "}a[c+1]=b+encodeURI(this.diffs[c][1])+"\n"}return a.join("").replace(/%20/g," ")};
-this.diff_match_patch=diff_match_patch;this.DIFF_DELETE=DIFF_DELETE;this.DIFF_INSERT=DIFF_INSERT;this.DIFF_EQUAL=DIFF_EQUAL;
+diff_match_patch.patch_obj.prototype.toString=function(){var a=["@@ -"+(0===this.length1?this.start1+",0":1==this.length1?this.start1+1:this.start1+1+","+this.length1)+" +"+(0===this.length2?this.start2+",0":1==this.length2?this.start2+1:this.start2+1+","+this.length2)+" @@\n"];diff_match_patch.prototype.diff_cleanupSplitSurrogates(this.diffs);for(var b=0;b<this.diffs.length;b++){switch(this.diffs[b][0]){case DIFF_INSERT:var d="+";break;case DIFF_DELETE:d="-";break;case DIFF_EQUAL:d=" "}a[b+1]=d+
+encodeURI(this.diffs[b][1])+"\n"}return a.join("").replace(/%20/g," ")};this.diff_match_patch=diff_match_patch;this.DIFF_DELETE=DIFF_DELETE;this.DIFF_INSERT=DIFF_INSERT;this.DIFF_EQUAL=DIFF_EQUAL;
\ No newline at end of file
diff --git a/javascript/diff_match_patch_uncompressed.js b/javascript/diff_match_patch_uncompressed.js
index 88a702c2..a9b41d66 100644
--- a/javascript/diff_match_patch_uncompressed.js
+++ b/javascript/diff_match_patch_uncompressed.js
@@ -1206,6 +1206,166 @@ diff_match_patch.prototype.diff_cleanupMerge = function(diffs) {
   }
 };
 
+/**
+ * Rearrange diff boundaries that split Unicode surrogate pairs.
+ * 
+ * @param {!Array.<!diff_match_patch.Diff>} diffs Array of diff tuples.
+ */
+diff_match_patch.prototype.diff_cleanupSplitSurrogates = function(diffs) {
+  var lastEnd;
+  for (var x = 0; x < diffs.length; x++) {
+    var thisDiff = diffs[x];
+    var thisTop  = thisDiff[1][0];
+    var thisEnd  = thisDiff[1][thisDiff[1].length - 1];
+
+    if (0 === thisDiff[1].length) {
+      diffs.splice(x--, 1);
+      continue;
+    }
+
+    if (thisEnd && this.isHighSurrogate(thisEnd)) {
+      lastEnd = thisEnd;
+      thisDiff[1] = thisDiff[1].slice(0, -1);
+    }
+
+    if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) {
+      thisDiff[1] = lastEnd + thisDiff[1];
+    }
+
+    if (0 === thisDiff[1].length) {
+      diffs.splice(x--, 1);
+      continue;
+    }
+  }
+
+  return diffs;
+};
+
+diff_match_patch.prototype.isHighSurrogate = function(c) {
+  var v = c.charCodeAt(0);
+  return v >= 0xD800 && v <= 0xDBFF;
+};
+
+diff_match_patch.prototype.isLowSurrogate = function(c) {
+  var v = c.charCodeAt(0);
+  return v >= 0xDC00 && v <= 0xDFFF;
+};
+
+diff_match_patch.prototype.digit16 = function(c) {
+  switch (c) {
+    case '0': return 0;
+    case '1': return 1;
+    case '2': return 2;
+    case '3': return 3;
+    case '4': return 4;
+    case '5': return 5;
+    case '6': return 6;
+    case '7': return 7;
+    case '8': return 8;
+    case '9': return 9;
+    case 'A': case 'a': return 10;
+    case 'B': case 'b': return 11;
+    case 'C': case 'c': return 12;
+    case 'D': case 'd': return 13;
+    case 'E': case 'e': return 14;
+    case 'F': case 'f': return 15;
+    default: throw new Error('Invalid hex-code');
+  }
+};
+
+/**
+ * Decode URI-encoded string but allow for encoded surrogate halves
+ * 
+ * diff_match_patch needs this relaxation of the requirements because
+ * not all libraries and versions produce valid URI strings in toDelta
+ * and we don't want to crash this code when the input is valid input
+ * but at the same time invalid utf-8
+ * 
+ * @example: decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70'
+ * @example: decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c'
+ * 
+ * @cite: @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js
+ * 
+ * @param {String} text input string encoded by encodeURI() or equivalent
+ * @return {String}
+ */
+diff_match_patch.prototype.decodeURI = function(text) {
+  try {
+    return decodeURI(text);
+  } catch ( e ) {
+    var i = 0;
+    var decoded = '';
+
+    while (i < text.length) {
+      if ( text[i] !== '%' ) {
+        decoded += text[i++];
+        continue;
+      }
+
+      // start a percent-sequence
+      var byte1 = (this.digit16(text[i + 1]) << 4) + this.digit16(text[i + 2]);
+      if ((byte1 & 0x80) === 0) {
+        decoded += String.fromCharCode(byte1);
+        i += 3;
+        continue;
+      }
+
+      if ('%' !== text[i + 3]) {
+        throw new URIError('URI malformed');
+      }
+
+      var byte2 = (this.digit16(text[i + 4]) << 4) + this.digit16(text[i + 5]);
+      if ((byte2 & 0xC0) !== 0x80) {
+        throw new URIError('URI malformed');
+      }
+      byte2 = byte2 & 0x3F;
+      if ((byte1 & 0xE0) === 0xC0) {
+        decoded += String.fromCharCode(((byte1 & 0x1F) << 6) | byte2);
+        i += 6;
+        continue;
+      }
+
+      if ('%' !== text[i + 6]) {
+        throw new URIError('URI malformed');
+      }
+
+      var byte3 = (this.digit16(text[i + 7]) << 4) + this.digit16(text[i + 8]);
+      if ((byte3 & 0xC0) !== 0x80) {
+        throw new URIError('URI malformed');
+      }
+      byte3 = byte3 & 0x3F;
+      if ((byte1 & 0xF0) === 0xE0) {
+        // unpaired surrogate are fine here
+        decoded += String.fromCharCode(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3);
+        i += 9;
+        continue;
+      }
+
+      if ('%' !== text[i + 9]) {
+        throw new URIError('URI malformed');
+      }
+
+      var byte4 = (this.digit16(text[i + 10]) << 4) + this.digit16(text[i + 11]);
+      if ((byte4 & 0xC0) !== 0x80) {
+        throw new URIError('URI malformed');
+      }
+      byte4 = byte4 & 0x3F;
+      if ((byte1 & 0xF8) === 0xF0) {
+        var codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
+        if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
+          decoded += String.fromCharCode((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800);
+          decoded += String.fromCharCode(0xDC00 | (codePoint & 0xFFFF) & 0x3FF);
+          i += 12;
+          continue;
+        }
+      }
+
+      throw new URIError('URI malformed');
+    }
+
+    return decoded;
+  }
+};
 
 /**
  * loc is a location in text1, compute and return the equivalent location in
@@ -1249,6 +1409,7 @@ diff_match_patch.prototype.diff_xIndex = function(diffs, loc) {
  * @return {string} HTML representation.
  */
 diff_match_patch.prototype.diff_prettyHtml = function(diffs) {
+  diffs = this.diff_cleanupSplitSurrogates(diffs);
   var html = [];
   var pattern_amp = /&/g;
   var pattern_lt = /</g;
@@ -1349,6 +1510,7 @@ diff_match_patch.prototype.diff_levenshtein = function(diffs) {
  * @return {string} Delta text.
  */
 diff_match_patch.prototype.diff_toDelta = function(diffs) {
+  diffs = this.diff_cleanupSplitSurrogates(diffs);
   var text = [];
   for (var x = 0; x < diffs.length; x++) {
     switch (diffs[x][0]) {
@@ -1388,7 +1550,7 @@ diff_match_patch.prototype.diff_fromDelta = function(text1, delta) {
       case '+':
         try {
           diffs[diffsLength++] =
-              new diff_match_patch.Diff(DIFF_INSERT, decodeURI(param));
+              new diff_match_patch.Diff(DIFF_INSERT, this.decodeURI(param));
         } catch (ex) {
           // Malformed URI sequence.
           throw new Error('Illegal escape in diff_fromDelta: ' + param);
@@ -1627,11 +1789,23 @@ diff_match_patch.prototype.patch_addContext_ = function(patch, text) {
   padding += this.Patch_Margin;
 
   // Add the prefix.
+  if (
+    patch.start2 - padding > 0 &&
+    diff_match_patch.prototype.isLowSurrogate(text[patch.start2 - padding])
+  ) {
+    padding++;
+  }
   var prefix = text.substring(patch.start2 - padding, patch.start2);
   if (prefix) {
     patch.diffs.unshift(new diff_match_patch.Diff(DIFF_EQUAL, prefix));
   }
   // Add the suffix.
+  if (
+    patch.start2 + patch.length1 + padding < text.length &&
+    diff_match_patch.prototype.isHighSurrogate(text[patch.start2 + patch.length1 + padding])
+  ) {
+    padding++;
+  }
   var suffix = text.substring(patch.start2 + patch.length1,
                               patch.start2 + patch.length1 + padding);
   if (suffix) {
@@ -1705,6 +1879,7 @@ diff_match_patch.prototype.patch_make = function(a, opt_b, opt_c) {
   if (diffs.length === 0) {
     return [];  // Get rid of the null case.
   }
+  diffs = this.diff_cleanupSplitSurrogates(diffs);
   var patches = [];
   var patch = new diff_match_patch.patch_obj();
   var patchDiffLength = 0;  // Keeping our own length var is faster in JS.
@@ -2202,6 +2377,7 @@ diff_match_patch.patch_obj.prototype.toString = function() {
   var text = ['@@ -' + coords1 + ' +' + coords2 + ' @@\n'];
   var op;
   // Escape the body of the patch with %xx notation.
+  diff_match_patch.prototype.diff_cleanupSplitSurrogates(this.diffs);
   for (var x = 0; x < this.diffs.length; x++) {
     switch (this.diffs[x][0]) {
       case DIFF_INSERT:
diff --git a/javascript/tests/diff_match_patch_test.js b/javascript/tests/diff_match_patch_test.js
index 109e56ad..99aaca13 100644
--- a/javascript/tests/diff_match_patch_test.js
+++ b/javascript/tests/diff_match_patch_test.js
@@ -492,6 +492,136 @@ function testDiffDelta() {
   // Convert delta string into a diff.
   assertEquivalent(diffs, dmp.diff_fromDelta(text1, delta));
 
+  diffs = [[DIFF_EQUAL, '\ud83d\ude4b\ud83d'], [DIFF_INSERT, '\ude4c\ud83d'], [DIFF_EQUAL, '\ude4b']];
+  try {
+    delta = dmp.diff_toDelta(diffs);
+    assertEquals('=2\t+%F0%9F%99%8C\t=2', delta);
+  } catch ( e ) {
+    assertEquals(false, true);
+  }
+
+  (function(){
+    const originalText = `U+1F17x	🅰️	🅱️		🅾️	🅿️ safhawifhkw
+    U+1F18x															🆎	
+    0	1	2	3	4	5	6	7	8	9	A	B	C	D	E	F
+    U+1F19x		🆑	🆒	🆓	🆔	🆕	🆖	🆗	🆘	🆙	🆚					
+    U+1F20x		🈁	🈂️							sfss.,_||saavvvbbds						
+    U+1F21x	🈚					
+    U+1F22x			🈯
+    U+1F23x			🈲	🈳	🈴	🈵	🈶	🈷️	🈸	🈹	🈺					
+    U+1F25x	🉐	🉑		
+    U+1F30x	🌀	🌁	🌂	🌃	🌄	🌅	🌆	🌇	🌈	🌉	🌊	🌋	🌌	🌍	🌎	🌏
+    U+1F31x	🌐	🌑	🌒	🌓	🌔	🌕	🌖	🌗	🌘	🌙	🌚	🌛	🌜	🌝	🌞	`;
+
+    // applies some random edits to string and returns new, edited string
+    function applyRandomTextEdit(text) {
+      let textArr = [...text];
+      let r = Math.random();
+      if(r < 1/3) { // swap
+      let swapCount = Math.floor(Math.random()*5);
+        for(let i = 0; i < swapCount; i++) {
+        let swapPos1 = Math.floor(Math.random()*textArr.length);
+          let swapPos2 = Math.floor(Math.random()*textArr.length);
+          let char1 = textArr[swapPos1];
+          let char2 = textArr[swapPos2];
+          textArr[swapPos1] = char2;
+          textArr[swapPos2] = char1;
+        }
+      } else if(r < 2/3) { // remove
+        let removeCount = Math.floor(Math.random()*5);
+        for(let i = 0; i < removeCount; i++) {
+          let removePos = Math.floor(Math.random()*textArr.length);
+          textArr[removePos] = "";
+        }
+      } else { // add
+        let addCount = Math.floor(Math.random()*5);
+        for(let i = 0; i < addCount; i++) {
+          let addPos = Math.floor(Math.random()*textArr.length);
+          let addFromPos = Math.floor(Math.random()*textArr.length);
+          textArr[addPos] = textArr[addPos] + textArr[addFromPos];
+        }
+      }
+      return textArr.join("");
+    }
+
+    for(let i = 0; i < 1000; i++) {
+      newText = applyRandomTextEdit(originalText);
+      dmp.diff_toDelta(dmp.diff_main(originalText, newText));
+    }
+  })();
+
+  // Unicode - splitting surrogates
+  try {
+    assertEquivalent(
+      dmp.diff_toDelta([[DIFF_INSERT,'\ud83c\udd71'], [DIFF_EQUAL, '\ud83c\udd70\ud83c\udd71']]),
+      dmp.diff_toDelta(dmp.diff_main('\ud83c\udd70\ud83c\udd71', '\ud83c\udd71\ud83c\udd70\ud83c\udd71'))
+    );
+  } catch ( e ) {
+    assertEquals('Inserting similar surrogate pair at beginning', 'crashed');
+  }
+
+  try {
+    assertEquivalent(
+      dmp.diff_toDelta([[DIFF_EQUAL,'\ud83c\udd70'], [DIFF_INSERT, '\ud83c\udd70'], [DIFF_EQUAL, '\ud83c\udd71']]),
+      dmp.diff_toDelta(dmp.diff_main('\ud83c\udd70\ud83c\udd71', '\ud83c\udd70\ud83c\udd70\ud83c\udd71'))
+    );
+  } catch ( e ) {
+    assertEquals('Inserting similar surrogate pair in the middle', 'crashed');
+  }
+
+  try {
+    assertEquivalent(
+      dmp.diff_toDelta([[DIFF_DELETE,'\ud83c\udd71'], [DIFF_EQUAL, '\ud83c\udd70\ud83c\udd71']]),
+      dmp.diff_toDelta(dmp.diff_main('\ud83c\udd71\ud83c\udd70\ud83c\udd71', '\ud83c\udd70\ud83c\udd71'))
+    );
+  } catch ( e ) {
+    assertEquals('Deleting similar surrogate pair at the beginning', 'crashed');
+  }
+
+  try {
+    assertEquivalent(
+      dmp.diff_toDelta([[DIFF_EQUAL, '\ud83c\udd70'], [DIFF_DELETE,'\ud83c\udd72'], [DIFF_EQUAL, '\ud83c\udd71']]),
+      dmp.diff_toDelta(dmp.diff_main('\ud83c\udd70\ud83c\udd72\ud83c\udd71', '\ud83c\udd70\ud83c\udd71'))
+    );
+  } catch ( e ) {
+    assertEquals('Deleting similar surrogate pair in the middle', 'crashed');
+  }
+
+  try {
+    assertEquivalent(
+      dmp.diff_toDelta([[DIFF_DELETE, '\ud83c\udd70'], [DIFF_INSERT, '\ud83c\udd71']]),
+      dmp.diff_toDelta([[DIFF_EQUAL, '\ud83c'], [DIFF_DELETE, '\udd70'], [DIFF_INSERT, '\udd71']]),
+    );
+  } catch ( e ) {
+    assertEquals('Swap surrogate pair', 'crashed');
+  }
+
+  try {
+    assertEquivalent(
+      dmp.diff_toDelta([[DIFF_INSERT, '\ud83c\udd70'], [DIFF_DELETE, '\ud83c\udd71']]),
+      dmp.diff_toDelta([[DIFF_EQUAL, '\ud83c'], [DIFF_INSERT, '\udd70'], [DIFF_DELETE, '\udd71']]),
+    );
+  } catch ( e ) {
+    assertEquals('Swap surrogate pair', 'crashed');
+  }
+
+  // Empty diff groups
+  assertEquivalent(
+    dmp.diff_toDelta([[DIFF_EQUAL, 'abcdef'], [DIFF_DELETE, ''], [DIFF_INSERT, 'ghijk']]),
+    dmp.diff_toDelta([[DIFF_EQUAL, 'abcdef'], [DIFF_INSERT, 'ghijk']]),
+  );
+
+  // Different versions of the library may have created deltas with
+  // half of a surrogate pair encoded as if it were valid UTF-8
+  try {
+    assertEquivalent(
+      dmp.diff_toDelta(dmp.diff_fromDelta('\ud83c\udd70', '-2\t+%F0%9F%85%B1')),
+      dmp.diff_toDelta(dmp.diff_fromDelta('\ud83c\udd70', '=1\t-1\t+%ED%B5%B1'))
+    );
+  } catch ( e ) {
+    assertEquals('Decode UTF8-encoded surrogate half', 'crashed');
+  }
+
   // Verify pool of unchanged characters.
   diffs = [[DIFF_INSERT, 'A-Z a-z 0-9 - _ . ! ~ * \' ( ) ; / ? : @ & = + $ , # ']];
   var text2 = dmp.diff_text2(diffs);

From 13309a1978e804d50a0c46caea97b6c3e82ddccf Mon Sep 17 00:00:00 2001
From: Dennis Snell <dmsnell@xkq.io>
Date: Tue, 30 Jan 2024 16:52:44 -0700
Subject: [PATCH 2/5] Java: Stop breaking surrogate pairs in toDelta()

Resolves Google/diff-match-patch#69 for Java

Sometimes we can find a common prefix that runs into the middle of a
surrogate pair and we split that pair when building our diff groups.

This is fine as long as we are operating on UTF-16 code units. It
becomes problematic when we start trying to treat those substrings as
valid Unicode (or UTF-8) sequences.

When we pass these split groups into `toDelta()` we do just that and the
library crashes. In this patch we're post-processing the diff groups
before encoding them to make sure that we un-split the surrogate pairs.

The post-processed diffs should produce the same output when applying
the diffs. The diff string itself will be different but should change
that much - only by a single character at surrogate boundaries.
---
 .../neil/plaintext/diff_match_patch.java      | 149 +++++++++++++++++-
 .../neil/plaintext/diff_match_patch_test.java |  36 +++++
 2 files changed, 177 insertions(+), 8 deletions(-)

diff --git a/java/src/name/fraser/neil/plaintext/diff_match_patch.java b/java/src/name/fraser/neil/plaintext/diff_match_patch.java
index 9d07867d..d467f7fa 100644
--- a/java/src/name/fraser/neil/plaintext/diff_match_patch.java
+++ b/java/src/name/fraser/neil/plaintext/diff_match_patch.java
@@ -19,6 +19,7 @@
 package name.fraser.neil.plaintext;
 
 import java.io.UnsupportedEncodingException;
+import java.lang.Character;
 import java.net.URLDecoder;
 import java.net.URLEncoder;
 import java.util.*;
@@ -1293,6 +1294,46 @@ public void diff_cleanupMerge(LinkedList<Diff> diffs) {
     }
   }
 
+  /**
+   * Rearrange diff boudnaries that split Unicode surrogate pairs.
+   * @param diffs Linked list of diff objects
+   */
+  public void diff_cleanupSplitSurrogates(List<Diff> diffs) {
+    char lastEnd = 0;
+    boolean isFirst = true;
+    HashSet<Diff> toRemove = new HashSet<Diff>();
+
+    for (Diff aDiff : diffs) {
+      if (aDiff.text.isEmpty()) {
+        toRemove.add(aDiff);
+        continue;
+      }
+
+      char thisTop = aDiff.text.charAt(0);
+      char thisEnd = aDiff.text.charAt(aDiff.text.length() - 1);
+
+      if (Character.isHighSurrogate(thisEnd)) {
+        lastEnd = thisEnd;
+        aDiff.text = aDiff.text.substring(0, aDiff.text.length() - 1);
+      }
+
+      if (!isFirst && Character.isHighSurrogate(lastEnd) && Character.isLowSurrogate(thisTop)) {
+        aDiff.text = lastEnd + aDiff.text;
+      }
+      
+      isFirst = false;
+
+      if ( aDiff.text.isEmpty() ) {
+        toRemove.add(aDiff);
+        continue;
+      }
+    }
+
+    for (Diff aDiff : toRemove) {
+      diffs.remove(aDiff);
+    }
+  }
+
   /**
    * loc is a location in text1, compute and return the equivalent location in
    * text2.
@@ -1429,6 +1470,7 @@ public int diff_levenshtein(List<Diff> diffs) {
    */
   public String diff_toDelta(List<Diff> diffs) {
     StringBuilder text = new StringBuilder();
+    this.diff_cleanupSplitSurrogates(diffs);
     for (Diff aDiff : diffs) {
       switch (aDiff.operation) {
       case INSERT:
@@ -1457,6 +1499,103 @@ public String diff_toDelta(List<Diff> diffs) {
     return delta;
   }
 
+  private int digit16(char b) throws IllegalArgumentException {
+    switch (b) {
+      case '0': return 0;
+      case '1': return 1;
+      case '2': return 2;
+      case '3': return 3;
+      case '4': return 4;
+      case '5': return 5;
+      case '6': return 6;
+      case '7': return 7;
+      case '8': return 8;
+      case '9': return 9;
+      case 'A': case 'a': return 10;
+      case 'B': case 'b': return 11;
+      case 'C': case 'c': return 12;
+      case 'D': case 'd': return 13;
+      case 'E': case 'e': return 14;
+      case 'F': case 'f': return 15;
+      default:
+        throw new IllegalArgumentException();
+    }
+  }
+
+  private String decodeURI(String text) throws IllegalArgumentException {
+    int i = 0;
+    StringBuilder decoded = new StringBuilder(text.length());
+
+    while (i < text.length()) {
+      if (text.charAt(i) != '%') {
+        decoded.append(text.charAt(i++));
+        continue;
+      }
+
+      // start a percent-sequence
+      int byte1 = (digit16(text.charAt(i + 1)) << 4) + digit16(text.charAt(i + 2));
+      if ((byte1 & 0x80) == 0) {
+        decoded.append(Character.toChars(byte1));
+        i += 3;
+        continue;
+      }
+
+      if ( text.charAt(i + 3) != '%') {
+        throw new IllegalArgumentException();
+      }
+
+      int byte2 = (digit16(text.charAt(i + 4)) << 4) + digit16(text.charAt(i + 5));
+      if ((byte2 & 0xC0) != 0x80) {
+        throw new IllegalArgumentException();
+      }
+      byte2 = byte2 & 0x3F;
+      if ((byte1 & 0xE0) == 0xC0) {
+        decoded.append(Character.toChars(((byte1 & 0x1F) << 6) | byte2));
+        i += 6;
+        continue;
+      }
+
+      if (text.charAt(i + 6) != '%') {
+        throw new IllegalArgumentException();
+      }
+
+      int byte3 = (digit16(text.charAt(i + 7)) << 4) + digit16(text.charAt(i + 8));
+      if ((byte3 & 0xC0) != 0x80) {
+        throw new IllegalArgumentException();
+      }
+      byte3 = byte3 & 0x3F;
+      if ((byte1 & 0xF0) == 0xE0) {
+        // unpaired surrogate are fine here
+        decoded.append(Character.toChars(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3));
+        i += 9;
+        continue;
+      }
+
+      if (text.charAt(i + 9) != '%') {
+        throw new IllegalArgumentException();
+      }
+
+      int byte4 = (digit16(text.charAt(i + 10)) << 4) + digit16(text.charAt(i + 11));
+      if ((byte4 & 0xC0) != 0x80) {
+        throw new IllegalArgumentException();
+      }
+      byte4 = byte4 & 0x3F;
+      if ((byte1 & 0xF8) == 0xF0) {
+        int codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
+        if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
+          decoded.append(Character.toChars((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800));
+          decoded.append(Character.toChars(0xDC00 | (codePoint & 0xFFFF) & 0x3FF));
+          i += 12;
+          continue;
+        }
+      }
+
+      throw new IllegalArgumentException();
+    }
+
+    return decoded.toString();
+  }
+
   /**
    * Given the original text1, and an encoded string which describes the
    * operations required to transform text1 into text2, compute the full diff.
@@ -1483,10 +1622,7 @@ public LinkedList<Diff> diff_fromDelta(String text1, String delta)
         // decode would change all "+" to " "
         param = param.replace("+", "%2B");
         try {
-          param = URLDecoder.decode(param, "UTF-8");
-        } catch (UnsupportedEncodingException e) {
-          // Not likely on modern system.
-          throw new Error("This system does not support UTF-8.", e);
+          param = this.decodeURI(param);
         } catch (IllegalArgumentException e) {
           // Malformed URI sequence.
           throw new IllegalArgumentException(
@@ -2269,10 +2405,7 @@ public List<Patch> patch_fromText(String textline)
         line = text.getFirst().substring(1);
         line = line.replace("+", "%2B");  // decode would change all "+" to " "
         try {
-          line = URLDecoder.decode(line, "UTF-8");
-        } catch (UnsupportedEncodingException e) {
-          // Not likely on modern system.
-          throw new Error("This system does not support UTF-8.", e);
+          line = this.decodeURI(line);
         } catch (IllegalArgumentException e) {
           // Malformed URI sequence.
           throw new IllegalArgumentException(
diff --git a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java
index 2f387933..5be10f13 100644
--- a/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java
+++ b/java/tests/name/fraser/neil/plaintext/diff_match_patch_test.java
@@ -424,6 +424,42 @@ public static void testDiffDelta() {
 
     assertEquals("diff_fromDelta: Unicode.", diffs, dmp.diff_fromDelta(text1, delta));
 
+    diffs = diffList(new Diff(EQUAL, "\ud83d\ude4b\ud83d"), new Diff(INSERT, "\ude4c\ud83d"), new Diff(EQUAL, "\ude4b"));
+    delta = dmp.diff_toDelta(diffs);
+    assertEquals("diff_toDelta: Surrogate Pairs.", "=2\t+%F0%9F%99%8C\t=2", delta);
+
+    assertEquals(
+      "diff_toDelta: insert surrogate pair between similar high surrogates",
+      dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c\udd70"), new Diff(INSERT, "\ud83c\udd70"), new Diff(EQUAL, "\ud83c\udd71"))),
+      dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c\udd70\ud83c"), new Diff(INSERT, "\udd70\ud83c"), new Diff(EQUAL, "\udd71")))
+    );
+
+    assertEquals(
+      "diff_toDelta: swap surrogate pairs delete/insert",
+      dmp.diff_toDelta(diffList(new Diff(DELETE, "\ud83c\udd70"), new Diff(INSERT, "\ud83c\udd71"))),
+      dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c"), new Diff(DELETE, "\udd70"), new Diff(INSERT, "\udd71")))
+    );
+
+    assertEquals(
+      "diff_toDelta: swap surrogate pairs insert/delete",
+      dmp.diff_toDelta(diffList(new Diff(INSERT, "\ud83c\udd70"), new Diff(DELETE, "\ud83c\udd71"))),
+      dmp.diff_toDelta(diffList(new Diff(EQUAL, "\ud83c"), new Diff(INSERT, "\udd70"), new Diff(DELETE, "\udd71")))
+    );
+
+    assertEquals(
+      "diff_toDelta: empty diff groups",
+      dmp.diff_toDelta(diffList(new Diff(EQUAL, "abcdef"), new Diff(DELETE, ""), new Diff(INSERT, "ghijk"))),
+      dmp.diff_toDelta(diffList(new Diff(EQUAL, "abcdef"), new Diff(INSERT, "ghijk")))
+    );
+
+    // Different versions of the library may have created deltas with
+    // half of a surrogate pair encoded as if it were valid UTF-8
+    assertEquals(
+      "diff_toDelta: surrogate half encoded as UTF8",
+      dmp.diff_toDelta(dmp.diff_fromDelta("\ud83c\udd70", "-2\t+%F0%9F%85%B1")),
+      dmp.diff_toDelta(dmp.diff_fromDelta("\ud83c\udd70", "=1\t-1\t+%ED%B5%B1"))
+    );
+
     // Verify pool of unchanged characters.
     diffs = diffList(new Diff(INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # "));
     String text2 = dmp.diff_text2(diffs);

From dfadc9c073aa86a04860c82861391302a2ff7d4c Mon Sep 17 00:00:00 2001
From: Dennis Snell <dmsnell@xkq.io>
Date: Tue, 30 Jan 2024 16:53:15 -0700
Subject: [PATCH 3/5] Objective-C: Stop breaking surrogate pairs in toDelta()

Resolves Google/diff-match-patch#69 for Objective-C

Sometimes we can find a common prefix that runs into the middle of a
surrogate pair and we split that pair when building our diff groups.

This is fine as long as we are operating on UTF-16 code units. It
becomes problematic when we start trying to treat those substrings as
valid Unicode (or UTF-8) sequences.

When we pass these split groups into `toDelta()` we do just that and the
library crashes. In this patch we're post-processing the diff groups
before encoding them to make sure that we un-split the surrogate pairs.

The post-processed diffs should produce the same output when applying
the diffs. The diff string itself will be different but should change
that much - only by a single character at surrogate boundaries.
---
 objectivec/DiffMatchPatch.m           | 193 +++++++++++++++++++++++++-
 objectivec/Tests/DiffMatchPatchTest.m |  67 +++++++++
 2 files changed, 259 insertions(+), 1 deletion(-)

diff --git a/objectivec/DiffMatchPatch.m b/objectivec/DiffMatchPatch.m
index 580f2656..0c56201d 100755
--- a/objectivec/DiffMatchPatch.m
+++ b/objectivec/DiffMatchPatch.m
@@ -1299,7 +1299,28 @@ - (NSString *)diff_text2:(NSMutableArray *)diffs;
 - (NSString *)diff_toDelta:(NSMutableArray *)diffs;
 {
   NSMutableString *delta = [NSMutableString string];
+  UniChar lastEnd = 0;
   for (Diff *aDiff in diffs) {
+    if (0 == [aDiff.text length]) {
+        continue;
+    }
+
+    UniChar thisTop = [aDiff.text characterAtIndex:0];
+    UniChar thisEnd = [aDiff.text characterAtIndex:([aDiff.text length]-1)];
+
+    if (CFStringIsSurrogateHighCharacter(thisEnd)) {
+      lastEnd = thisEnd;
+      aDiff.text = [aDiff.text substringToIndex:([aDiff.text length] - 1)];
+    }
+
+    if (0 != lastEnd && CFStringIsSurrogateHighCharacter(lastEnd) && CFStringIsSurrogateLowCharacter(thisTop)) {
+      aDiff.text = [NSString stringWithFormat:@"%C%@", lastEnd, aDiff.text];
+    }
+
+    if (0 == [aDiff.text length]) {
+      continue;
+    }
+
     switch (aDiff.operation) {
       case DIFF_INSERT:
         [delta appendFormat:@"+%@\t", [[aDiff.text diff_stringByAddingPercentEscapesForEncodeUriCompatibility]
@@ -1321,6 +1342,176 @@ - (NSString *)diff_toDelta:(NSMutableArray *)diffs;
   return delta;
 }
 
+- (NSUInteger)diff_digit16:(unichar)c
+{
+    switch (c) {
+        case '0': return 0;
+        case '1': return 1;
+        case '2': return 2;
+        case '3': return 3;
+        case '4': return 4;
+        case '5': return 5;
+        case '6': return 6;
+        case '7': return 7;
+        case '8': return 8;
+        case '9': return 9;
+        case 'A': case 'a': return 10;
+        case 'B': case 'b': return 11;
+        case 'C': case 'c': return 12;
+        case 'D': case 'd': return 13;
+        case 'E': case 'e': return 14;
+        case 'F': case 'f': return 15;
+        default:
+            [NSException raise:@"Invalid percent-encoded string" format:@"%c is not a hex digit", c];
+    }
+}
+
+/**
+* Decode a percent-encoded UTF-8 string into a string of UTF-16 code units
+* This is more permissive than `stringByRemovingPercentEncoding` because
+* that fails if the input represents invalid Unicode characters. However, different
+* diff-match-patch libraries may encode surrogate halves as if they were valid
+* Unicode code points. Therefore, instead of failing or corrupting the output, which
+* `stringByRemovingPercentEncoding` does when it inserts "(null)" in these places
+* we can decode it anyway and then once the string is reconstructed from the diffs
+* we'll end up with valid Unicode again, after the surrogate halves are re-joined
+*/
+- (NSString *)diff_decodeURIWithText:(NSString *)percentEncoded
+{
+    unichar decoded[[percentEncoded length]];
+    NSInteger input = 0;
+    NSInteger output = 0;
+    
+    @try {
+        while (input < [percentEncoded length]) {
+            unichar c = [percentEncoded characterAtIndex:input];
+
+            // not special, so just return it
+            if ('%' != c) {
+                decoded[output++] = c;
+                input += 1;
+                continue;
+            }
+
+            NSUInteger byte1 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+1)]] << 4) +
+                                [self diff_digit16:[percentEncoded characterAtIndex:(input+2)]];
+
+            // single-byte UTF-8 first byte has bitmask 0xxx xxxx
+            if ((byte1 & 0x80) == 0) {
+                decoded[output++] = byte1;
+                input += 3;
+                continue;
+            }
+
+            // at least one continuation byte
+            if ('%' != [percentEncoded characterAtIndex:(input + 3)]) {
+                return nil;
+            }
+
+            NSUInteger byte2 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+4)]] << 4) +
+                                [self diff_digit16:[percentEncoded characterAtIndex:(input+5)]];
+
+            // continuation bytes have bitmask 10xx xxxx
+            if ((byte2 & 0xC0) != 0x80) {
+                return nil;
+            }
+
+            // continuation bytes thus only contribute six bits each
+            // these data bits are found with the bit mask xx11 1111
+            byte2 = byte2 & 0x3F;
+
+            // in two-byte sequences the first byte has bitmask 110x xxxx
+            if ((byte1 & 0xE0) == 0xC0) {
+                // byte1 ___x xxxx << 6
+                // byte2        __yy yyyy
+                // value    x xxxxyy yyyy -> 11 bits
+                decoded[output++] = ((byte1 & 0x1F) << 6) | byte2;
+                input += 6;
+                continue;
+            }
+
+            // at least two continuation bytes
+            if ('%' != [percentEncoded characterAtIndex:(input + 6)]) {
+                return nil;
+            }
+
+            NSUInteger byte3 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+7)]] << 4) +
+                                [self diff_digit16:[percentEncoded characterAtIndex:(input+8)]];
+
+            if ((byte3 & 0xC0) != 0x80) {
+                return nil;
+            }
+
+            byte3 = byte3 & 0x3F;
+
+            // in three-byte sequences the first byte has bitmask 1110 xxxx
+            if ((byte1 & 0xF0) == 0xE0) {
+                // byte1 ____ xxxx << 12
+                // byte2        __yy yyyy << 6
+                // byte3               __zz zzzz
+                // value      xxxxyy yyyyzz zzzz -> 16 bits
+                decoded[output++] = ((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3;
+                input += 9;
+                continue;
+            }
+
+            // three continuation bytes
+            if ('%' != [percentEncoded characterAtIndex:(input + 9)]) {
+                return nil;
+            }
+
+            NSUInteger byte4 = ([self diff_digit16:[percentEncoded characterAtIndex:(input+10)]] << 4) +
+                                [self diff_digit16:[percentEncoded characterAtIndex:(input+11)]];
+
+            if ((byte4 & 0xC0) != 0x80) {
+                return nil;
+            }
+
+            byte4 = byte4 & 0x3F;
+
+            // in four-byte sequences the first byte has bitmask 1111 0xxx
+            if ((byte1 & 0xF8) == 0xF0) {
+                // byte1 ____ _xxx << 18
+                // byte2        __yy yyyy << 12
+                // byte3               __zz zzzz << 6
+                // byte4                      __tt tttt
+                // value       xxxyy yyyyzz zzzztt tttt -> 21 bits
+                NSUInteger codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4;
+                if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) {
+                    codePoint -= 0x010000;
+                    decoded[output++] = ((codePoint >> 10) & 0x3FF) | 0xD800;
+                    decoded[output++] = 0xDC00 | (codePoint & 0x3FF);
+                    input += 12;
+                    continue;
+                }
+            }
+
+            return nil;
+        }
+    }
+    @catch (NSException *e) {
+        return nil;
+    }
+
+    // some objective-c versions of the library produced patches with
+    // (null) in the place where surrogates were split across diff
+    // boundaries. if we leave those in we'll be stuck with a
+    // high-surrogate (null) low-surrogate pattern that will break
+    // deeper in the library or consuming application. we'll "fix"
+    // these by dropping the (null) and re-joining the surrogate halves
+    NSString *result = [NSString stringWithCharacters:decoded length:output];
+    NSRegularExpression *replacer = [NSRegularExpression
+                                     regularExpressionWithPattern:@"([\\x{D800}-\\x{DBFF}])\\(null\\)([\\x{DC00}-\\x{DFFF}])"
+                                     options:0
+                                     error:nil];
+
+    return [replacer
+            stringByReplacingMatchesInString:result
+            options:0
+            range:NSMakeRange(0, [result length])
+            withTemplate:@"$1$2"];
+}
+
 /**
  * Given the original text1, and an encoded NSString which describes the
  * operations required to transform text1 into text2, compute the full diff.
@@ -1348,7 +1539,7 @@ - (NSMutableArray *)diff_fromDeltaWithText:(NSString *)text1
     NSString *param = [token substringFromIndex:1];
     switch ([token characterAtIndex:0]) {
       case '+':
-        param = [param diff_stringByReplacingPercentEscapesForEncodeUriCompatibility];
+        param = [self diff_decodeURIWithText:param];
         if (param == nil) {
           if (error != NULL) {
             errorDetail = [NSDictionary dictionaryWithObjectsAndKeys:
diff --git a/objectivec/Tests/DiffMatchPatchTest.m b/objectivec/Tests/DiffMatchPatchTest.m
index 9697b04c..7e31508b 100755
--- a/objectivec/Tests/DiffMatchPatchTest.m
+++ b/objectivec/Tests/DiffMatchPatchTest.m
@@ -752,6 +752,68 @@ - (void)test_diff_deltaTest {
 
   XCTAssertEqualObjects(diffs, [dmp diff_fromDeltaWithText:text1 andDelta:delta error:NULL], @"diff_fromDelta: Unicode 2.");
 
+  diffs = [dmp diff_mainOfOldString:@"☺️🖖🏿" andNewString:@"☺️😃🖖🏿"];
+  delta = [dmp diff_toDelta:diffs];
+
+  XCTAssertEqualObjects(delta, @"=2\t+%F0%9F%98%83\t=4", @"Delta should match the expected string");
+
+  diffs = [dmp diff_mainOfOldString:@"☺️🖖🏿" andNewString:@"☺️😃🖖🏿"];
+  NSArray *patches = [dmp patch_makeFromDiffs:diffs];
+  NSArray *patchResult = [dmp patch_apply:patches toString:@"☺️🖖🏿"];
+
+  expectedString = [patchResult firstObject];
+  XCTAssertEqualObjects(@"☺️😃🖖🏿", expectedString, @"Output String should match the Edited one!");
+
+  // Unicode - splitting surrogates
+
+  // Inserting similar surrogate pair at beginning
+  diffs = [NSMutableArray arrayWithObjects:
+           [Diff diffWithOperation:DIFF_INSERT andText:@"🅱"],
+           [Diff diffWithOperation:DIFF_EQUAL andText:@"🅰🅱"],
+           nil];
+  XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"🅰🅱" andNewString:@"🅱🅰🅱"]]);
+
+  // Inserting similar surrogate pair in the middle
+  diffs = [NSMutableArray arrayWithObjects:
+           [Diff diffWithOperation:DIFF_EQUAL andText:@"🅰"],
+           [Diff diffWithOperation:DIFF_INSERT andText:@"🅰"],
+           [Diff diffWithOperation:DIFF_EQUAL andText:@"🅱"],
+           nil];
+  XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"🅰🅱" andNewString:@"🅰🅰🅱"]]);
+
+  // Deleting similar surrogate pair at the beginning
+  diffs = [NSMutableArray arrayWithObjects:
+           [Diff diffWithOperation:DIFF_DELETE andText:@"🅱"],
+           [Diff diffWithOperation:DIFF_EQUAL andText:@"🅰🅱"],
+           nil];
+  XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"🅱🅰🅱" andNewString:@"🅰🅱"]]);
+
+  // Deleting similar surrogate pair in the middle
+  diffs = [NSMutableArray arrayWithObjects:
+           [Diff diffWithOperation:DIFF_EQUAL andText:@"🅰"],
+           [Diff diffWithOperation:DIFF_DELETE andText:@"🅲"],
+           [Diff diffWithOperation:DIFF_EQUAL andText:@"🅱"],
+           nil];
+  XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"🅰🅲🅱" andNewString:@"🅰🅱"]]);
+
+  // Swapping surrogate pairs
+  diffs = [NSMutableArray arrayWithObjects:
+           [Diff diffWithOperation:DIFF_DELETE andText:@"🅰"],
+           [Diff diffWithOperation:DIFF_INSERT andText:@"🅱"],
+           nil];
+  XCTAssertEqualObjects( [dmp diff_toDelta:diffs], [dmp diff_toDelta:[dmp diff_mainOfOldString:@"🅰" andNewString:@"🅱"]]);
+
+  // Swapping surrogate pairs
+  XCTAssertEqualObjects( [dmp diff_toDelta:([NSMutableArray arrayWithObjects:
+                                            [Diff diffWithOperation:DIFF_DELETE andText:@"🅰"],
+                                            [Diff diffWithOperation:DIFF_INSERT andText:@"🅱"],
+                                             nil])],
+                        [dmp diff_toDelta:([NSMutableArray arrayWithObjects:
+                                            [Diff diffWithOperation:DIFF_EQUAL andText:[NSString stringWithFormat:@"%C", 0xd83c]],
+                                            [Diff diffWithOperation:DIFF_DELETE andText:[NSString stringWithFormat:@"%C", 0xdd70]],
+                                            [Diff diffWithOperation:DIFF_INSERT andText:[NSString stringWithFormat:@"%C", 0xdd71]],
+                                            nil])]);
+
   // Verify pool of unchanged characters.
   diffs = [NSMutableArray arrayWithObject:
        [Diff diffWithOperation:DIFF_INSERT andText:@"A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # "]];
@@ -781,6 +843,11 @@ - (void)test_diff_deltaTest {
   expectedResult = [dmp diff_fromDeltaWithText:@"" andDelta:delta error:NULL];
   XCTAssertEqualObjects(diffs, expectedResult, @"diff_fromDelta: 160kb string. Convert delta string into a diff.");
 
+  // Different versions of the library may have created deltas with
+  // half of a surrogate pair encoded as if it were valid UTF-8
+  XCTAssertEqualObjects([dmp diff_toDelta:([dmp diff_fromDeltaWithText:@"🅰" andDelta:@"-2\t+%F0%9F%85%B1" error:NULL])],
+                        [dmp diff_toDelta:([dmp diff_fromDeltaWithText:@"🅰" andDelta:@"=1\t-1\t+%ED%B5%B1" error:NULL])]);
+
   [dmp release];
 }
 

From db1cbba2800fd0b30c71adb39a2066d46c37cfc3 Mon Sep 17 00:00:00 2001
From: Dennis Snell <dmsnell@xkq.io>
Date: Tue, 30 Jan 2024 16:53:42 -0700
Subject: [PATCH 4/5] Python2: Stop breaking surrogate pairs in toDelta()

Resolves Google/diff-match-patch#69 for Python2

Sometimes we can find a common prefix that runs into the middle of a
surrogate pair and we split that pair when building our diff groups.

This is fine as long as we are operating on UTF-16 code units. It
becomes problematic when we start trying to treat those substrings as
valid Unicode (or UTF-8) sequences.

When we pass these split groups into `toDelta()` we do just that and the
library crashes. In this patch we're post-processing the diff groups
before encoding them to make sure that we un-split the surrogate pairs.

The post-processed diffs should produce the same output when applying
the diffs. The diff string itself will be different but should change
that much - only by a single character at surrogate boundaries.
---
 python2/diff_match_patch.py            | 34 +++++++++--
 python2/tests/diff_match_patch_test.py | 80 ++++++++++++++++++++++++++
 2 files changed, 110 insertions(+), 4 deletions(-)

diff --git a/python2/diff_match_patch.py b/python2/diff_match_patch.py
index 806fe1e6..8b26125c 100644
--- a/python2/diff_match_patch.py
+++ b/python2/diff_match_patch.py
@@ -28,6 +28,7 @@
 __author__ = 'fraser@google.com (Neil Fraser)'
 
 import re
+import struct
 import sys
 import time
 import urllib
@@ -1135,6 +1136,14 @@ def diff_levenshtein(self, diffs):
     levenshtein += max(insertions, deletions)
     return levenshtein
 
+  @classmethod
+  def is_high_surrogate(cls, c):
+    return 0xd800 <= struct.unpack('>H', c)[0] <= 0xdbff
+
+  @classmethod
+  def is_low_surrogate(cls, c):
+    return 0xdc00 <= struct.unpack('>H', c)[0] <= 0xdfff
+
   def diff_toDelta(self, diffs):
     """Crush the diff into an encoded string which describes the operations
     required to transform text1 into text2.
@@ -1148,15 +1157,32 @@ def diff_toDelta(self, diffs):
       Delta text.
     """
     text = []
+    last_end = None
     for (op, data) in diffs:
+      if 0 == len(data):
+        continue
+
+      encoded = data.encode('utf-16be')
+      this_top = encoded[0:2]
+      this_end = encoded[-2:]
+
+      if self.is_high_surrogate(this_end):
+        last_end = this_end
+        encoded = encoded[0:-2]
+
+      if last_end and self.is_high_surrogate(last_end) and self.is_low_surrogate(this_top):
+        encoded = last_end + encoded
+
+      if 0 == len(encoded):
+        continue
+
       if op == self.DIFF_INSERT:
         # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
-        data = data.encode("utf-8")
-        text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
+        text.append("+" + urllib.quote(encoded.decode('utf-16be').encode('utf-8'), "!~*'();/?:@&=+$,# "))
       elif op == self.DIFF_DELETE:
-        text.append("-%d" % len(data))
+        text.append("-%d" % (len(encoded) // 2))
       elif op == self.DIFF_EQUAL:
-        text.append("=%d" % len(data))
+        text.append("=%d" % (len(encoded) // 2))
     return "\t".join(text)
 
   def diff_fromDelta(self, text1, delta):
diff --git a/python2/tests/diff_match_patch_test.py b/python2/tests/diff_match_patch_test.py
index 661a6b67..94f5fd35 100644
--- a/python2/tests/diff_match_patch_test.py
+++ b/python2/tests/diff_match_patch_test.py
@@ -441,6 +441,86 @@ def testDiffDelta(self):
     # Convert delta string into a diff.
     self.assertEquals(diffs, self.dmp.diff_fromDelta(text1, delta))
 
+    diffs = [(self.dmp.DIFF_EQUAL, u"\ud83d\ude4b\ud83d"), (self.dmp.DIFF_INSERT, u"\ude4c\ud83d"), (self.dmp.DIFF_EQUAL, u"\ude4b")]
+    delta = self.dmp.diff_toDelta(diffs)
+    self.assertEquals("=2\t+%F0%9F%99%8C\t=2", delta)
+
+    # Unicode: split surrogates
+    # Inserting similar surrogate pair at beginning
+    self.assertEquals(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_INSERT, u'\U0001F171'),
+        (self.dmp.DIFF_EQUAL, u'\U0001F170\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        u'\U0001F170\U0001F171',
+        u'\U0001F171\U0001F170\U0001F171'
+      ))
+    )
+
+    # Inserting similar surrogate pair in the middle
+    self.assertEquals(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_EQUAL, u'\U0001F170'),
+        (self.dmp.DIFF_INSERT, u'\U0001F172'),
+        (self.dmp.DIFF_EQUAL, u'\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        u'\U0001F170\U0001F171',
+        u'\U0001F170\U0001F172\U0001F171'
+      ))
+    )
+
+    # Deleting similar surogate pair at the beginning
+    self.assertEquals(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_DELETE, u'\U0001F171'),
+        (self.dmp.DIFF_EQUAL, u'\U0001F170\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        u'\U0001F171\U0001F170\U0001F171',
+        u'\U0001F170\U0001F171'
+      ))
+    )
+
+    # Deleting similar surogate pair in the middle
+    self.assertEquals(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_EQUAL, u'\U0001F170'),
+        (self.dmp.DIFF_DELETE, u'\U0001F172'),
+        (self.dmp.DIFF_EQUAL, u'\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        u'\U0001F170\U0001F172\U0001F171',
+        u'\U0001F170\U0001F171'
+      ))
+    )
+
+    # Swap surrogate pair
+    self.assertEquals(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_DELETE, u'\U0001F170'),
+        (self.dmp.DIFF_INSERT, u'\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        u'\U0001F170',
+        u'\U0001F171'
+      ))
+    )
+
+    # Swap surrogate pair, force the invalid diff groups
+    self.assertEquals(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_INSERT, u'\U0001F170'),
+        (self.dmp.DIFF_DELETE, u'\U0001F171')
+      ]),
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_EQUAL, u'\ud83c'),
+        (self.dmp.DIFF_INSERT, u'\udd70'),
+        (self.dmp.DIFF_DELETE, u'\udd71')
+      ])
+    )
+
     # Verify pool of unchanged characters.
     diffs = [(self.dmp.DIFF_INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")]
     text2 = self.dmp.diff_text2(diffs)

From 50f15423d886660436d57144535afe2ef6c15987 Mon Sep 17 00:00:00 2001
From: Dennis Snell <dmsnell@xkq.io>
Date: Tue, 30 Jan 2024 16:54:05 -0700
Subject: [PATCH 5/5] Python3: Stop breaking surrogate pairs in toDelta()

Resolves Google/diff-match-patch#69 for Python3

Sometimes we can find a common prefix that runs into the middle of a
surrogate pair and we split that pair when building our diff groups.

This is fine as long as we are operating on UTF-16 code units. It
becomes problematic when we start trying to treat those substrings as
valid Unicode (or UTF-8) sequences.

When we pass these split groups into `toDelta()` we do just that and the
library crashes. In this patch we're post-processing the diff groups
before encoding them to make sure that we un-split the surrogate pairs.

The post-processed diffs should produce the same output when applying
the diffs. The diff string itself will be different but should change
that much - only by a single character at surrogate boundaries.
---
 python3/diff_match_patch.py            | 19 ++++---
 python3/tests/diff_match_patch_test.py | 70 ++++++++++++++++++++++++++
 2 files changed, 82 insertions(+), 7 deletions(-)

diff --git a/python3/diff_match_patch.py b/python3/diff_match_patch.py
index cc7f5907..3bf825c5 100644
--- a/python3/diff_match_patch.py
+++ b/python3/diff_match_patch.py
@@ -26,6 +26,7 @@
 __author__ = 'fraser@google.com (Neil Fraser)'
 
 import re
+import struct
 import sys
 import time
 import urllib.parse
@@ -1147,14 +1148,17 @@ def diff_toDelta(self, diffs):
     """
     text = []
     for (op, data) in diffs:
+      if 0 == len(data):
+        continue
+
       if op == self.DIFF_INSERT:
         # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
         data = data.encode("utf-8")
         text.append("+" + urllib.parse.quote(data, "!~*'();/?:@&=+$,# "))
       elif op == self.DIFF_DELETE:
-        text.append("-%d" % len(data))
+        text.append("-%d" % (len(data.encode('utf-16-be')) // 2))
       elif op == self.DIFF_EQUAL:
-        text.append("=%d" % len(data))
+        text.append("=%d" % (len(data.encode('utf-16-be')) // 2))
     return "\t".join(text)
 
   def diff_fromDelta(self, text1, delta):
@@ -1172,7 +1176,8 @@ def diff_fromDelta(self, text1, delta):
       ValueError: If invalid input.
     """
     diffs = []
-    pointer = 0  # Cursor in text1
+    as_utf16 = text1.encode('utf-16-be')
+    pointer = 0  # Cursor in as_utf16
     tokens = delta.split("\t")
     for token in tokens:
       if token == "":
@@ -1191,8 +1196,8 @@ def diff_fromDelta(self, text1, delta):
           raise ValueError("Invalid number in diff_fromDelta: " + param)
         if n < 0:
           raise ValueError("Negative number in diff_fromDelta: " + param)
-        text = text1[pointer : pointer + n]
-        pointer += n
+        text = as_utf16[pointer : pointer + n * 2].decode('utf-16-be')
+        pointer += n * 2
         if token[0] == "=":
           diffs.append((self.DIFF_EQUAL, text))
         else:
@@ -1201,10 +1206,10 @@ def diff_fromDelta(self, text1, delta):
         # Anything else is an error.
         raise ValueError("Invalid diff operation in diff_fromDelta: " +
             token[0])
-    if pointer != len(text1):
+    if pointer != len(as_utf16):
       raise ValueError(
           "Delta length (%d) does not equal source text length (%d)." %
-         (pointer, len(text1)))
+         (pointer, len(as_utf16)))
     return diffs
 
   #  MATCH FUNCTIONS
diff --git a/python3/tests/diff_match_patch_test.py b/python3/tests/diff_match_patch_test.py
index 3659d3e7..4ff16abd 100644
--- a/python3/tests/diff_match_patch_test.py
+++ b/python3/tests/diff_match_patch_test.py
@@ -18,6 +18,7 @@
 """
 
 import imp
+import json
 import os
 import sys
 import time
@@ -444,6 +445,12 @@ def testDiffDelta(self):
     # Convert delta string into a diff.
     self.assertEqual(diffs, self.dmp.diff_fromDelta(text1, delta))
 
+    diffs = self.dmp.diff_main("\U0001F64B\U0001F64B", "\U0001F64B\U0001F64C\U0001F64B")
+    delta = self.dmp.diff_toDelta(diffs)
+    self.assertEqual("=2\t+%F0%9F%99%8C\t=2", delta)
+
+    self.assertEqual(diffs, self.dmp.diff_fromDelta("\U0001F64B\U0001F64B", "=2\t+%F0%9F%99%8C\t=2"))
+
     # Verify pool of unchanged characters.
     diffs = [(self.dmp.DIFF_INSERT, "A-Z a-z 0-9 - _ . ! ~ * ' ( ) ; / ? : @ & = + $ , # ")]
     text2 = self.dmp.diff_text2(diffs)
@@ -455,6 +462,69 @@ def testDiffDelta(self):
     # Convert delta string into a diff.
     self.assertEqual(diffs, self.dmp.diff_fromDelta("", delta))
 
+    # Unicode: split surrogates
+    self.assertEqual(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_INSERT, '\U0001F171'),
+        (self.dmp.DIFF_EQUAL, '\U0001F170\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        '\U0001F170\U0001F171',
+        '\U0001F171\U0001F170\U0001F171'
+      )),
+      'Inserting similar surrogate pair at beginning'
+    )
+
+    self.assertEqual(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_EQUAL, '\U0001F170'),
+        (self.dmp.DIFF_INSERT, '\U0001F172'),
+        (self.dmp.DIFF_EQUAL, '\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        '\U0001F170\U0001F171',
+        '\U0001F170\U0001F172\U0001F171'
+      )),
+      'Inserting similar surrogate pair in the middle'
+    )
+
+    self.assertEqual(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_DELETE, '\U0001F171'),
+        (self.dmp.DIFF_EQUAL, '\U0001F170\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        '\U0001F171\U0001F170\U0001F171',
+        '\U0001F170\U0001F171'
+      )),
+      'Deleting similar surogate pair at the beginning'
+    )
+
+    self.assertEqual(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_EQUAL, '\U0001F170'),
+        (self.dmp.DIFF_DELETE, '\U0001F172'),
+        (self.dmp.DIFF_EQUAL, '\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        '\U0001F170\U0001F172\U0001F171',
+        '\U0001F170\U0001F171'
+      )),
+      'Deleting similar surogate pair in the middle'
+    )
+
+    self.assertEqual(
+      self.dmp.diff_toDelta([
+        (self.dmp.DIFF_DELETE, '\U0001F170'),
+        (self.dmp.DIFF_INSERT, '\U0001F171')
+      ]),
+      self.dmp.diff_toDelta(self.dmp.diff_main(
+        '\U0001F170',
+        '\U0001F171'
+      )),
+      'Swap surrogate pair'
+    )
+
     # 160 kb string.
     a = "abcdefghij"
     for i in range(14):