From 191b6987f03274ce8b1eb25a1424a8da8e221901 Mon Sep 17 00:00:00 2001 From: Dennis Snell Date: Wed, 10 May 2023 16:33:33 +0200 Subject: [PATCH] Expand padding of patch creation to avoid splitting surrogates --- javascript/diff_match_patch.js | 45 +-- javascript/diff_match_patch_uncompressed.js | 332 +++++++++++--------- 2 files changed, 203 insertions(+), 174 deletions(-) diff --git a/javascript/diff_match_patch.js b/javascript/diff_match_patch.js index 8f33865a..56b12cd8 100644 --- a/javascript/diff_match_patch.js +++ b/javascript/diff_match_patch.js @@ -5,48 +5,49 @@ diff_match_patch.prototype.diff_compute_=function(a,b,d,c){if(!a)return[new diff a),new diff_match_patch.Diff(DIFF_INSERT,b)]:(e=this.diff_halfMatch_(a,b))?(b=e[1],f=e[3],a=e[4],e=this.diff_main(e[0],e[2],d,c),d=this.diff_main(b,f,d,c),e.concat([new diff_match_patch.Diff(DIFF_EQUAL,a)],d)):d&&100d);t++){for(var v=-t+p;v<=t-x;v+=2){var n=f+v;var r=v==-t||v!=t&&h[n-1]c)x+=2;else if(y>e)p+=2;else if(m&&(n=f+k-v,0<=n&&n= -u)return this.diff_bisectSplit_(a,b,r,y,d)}}for(v=-t+w;v<=t-q;v+=2){n=f+v;u=v==-t||v!=t&&l[n-1]c)q+=2;else if(r>e)w+=2;else if(!m&&(n=f+k-v,0<=n&&n=u)))return this.diff_bisectSplit_(a,b,r,y,d)}}return[new diff_match_patch.Diff(DIFF_DELETE,a),new diff_match_patch.Diff(DIFF_INSERT,b)]}; +diff_match_patch.prototype.diff_bisect_=function(a,b,d){for(var c=a.length,e=b.length,f=Math.ceil((c+e)/2),g=2*f,h=Array(g),l=Array(g),k=0;kd);u++){for(var v=-u+n;v<=u-w;v+=2){var q=f+v;var t=v==-u||v!=u&&h[q-1]c)w+=2;else if(y>e)n+=2;else if(m&&(q=f+k-v,0<=q&&q= +x)return this.diff_bisectSplit_(a,b,t,y,d)}}for(v=-u+p;v<=u-r;v+=2){q=f+v;x=v==-u||v!=u&&l[q-1]c)r+=2;else if(t>e)p+=2;else if(!m&&(q=f+k-v,0<=q&&q=x)))return this.diff_bisectSplit_(a,b,t,y,d)}}return[new diff_match_patch.Diff(DIFF_DELETE,a),new diff_match_patch.Diff(DIFF_INSERT,b)]}; diff_match_patch.prototype.diff_bisectSplit_=function(a,b,d,c,e){var f=a.substring(0,d),g=b.substring(0,c);a=a.substring(d);b=b.substring(c);f=this.diff_main(f,g,!1,e);e=this.diff_main(a,b,!1,e);return f.concat(e)}; -diff_match_patch.prototype.diff_linesToChars_=function(a,b){function d(a){for(var b="",d=0,g=-1,h=c.length;gc?a=a.substring(d-c):d=a.length?[h,k,l,m,g]:null}if(0>=this.Diff_Timeout)return null; +diff_match_patch.prototype.diff_halfMatch_=function(a,b){function d(k,m,n){for(var w=k.substring(n,n+Math.floor(k.length/4)),p=-1,r="",u,v,q,t;-1!=(p=m.indexOf(w,p+1));){var y=f.diff_commonPrefix(k.substring(n),m.substring(p)),x=f.diff_commonSuffix(k.substring(0,n),m.substring(0,p));r.length=k.length?[u,v,q,t,r]:null}if(0>=this.Diff_Timeout)return null; var c=a.length>b.length?a:b,e=a.length>b.length?b:a;if(4>c.length||2*e.lengthc[4].length?g:c:c:g;else return null;if(a.length>b.length){c=g[0];e=g[1];var h=g[2];var l=g[3]}else h=g[0],l=g[1],c=g[2],e=g[3];return[c,e,h,l,g[4]]}; diff_match_patch.prototype.diff_cleanupSemantic=function(a){for(var b=!1,d=[],c=0,e=null,f=0,g=0,h=0,l=0,k=0;f=e){if(c>=b.length/2||c>=d.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,d.substring(0,c))),a[f-1][1]=b.substring(0,b.length-c),a[f+1][1]=d.substring(c),f++}else if(e>=b.length/2||e>=d.length/2)a.splice(f,0,new diff_match_patch.Diff(DIFF_EQUAL,b.substring(0,e))),a[f-1][0]=DIFF_INSERT,a[f-1][1]=d.substring(0,d.length-e),a[f+1][0]=DIFF_DELETE, a[f+1][1]=b.substring(e),f++;f++}f++}}; -diff_match_patch.prototype.diff_cleanupSemanticLossless=function(a){function b(a,b){if(!a||!b)return 6;var c=a.charAt(a.length-1),d=b.charAt(0),e=c.match(diff_match_patch.nonAlphaNumericRegex_),f=d.match(diff_match_patch.nonAlphaNumericRegex_),g=e&&c.match(diff_match_patch.whitespaceRegex_),h=f&&d.match(diff_match_patch.whitespaceRegex_);c=g&&c.match(diff_match_patch.linebreakRegex_);d=h&&d.match(diff_match_patch.linebreakRegex_);var k=c&&a.match(diff_match_patch.blanklineEndRegex_),l=d&&b.match(diff_match_patch.blanklineStartRegex_); -return k||l?5:c||d?4:e&&!g&&h?3:g||h?2:e||f?1:0}for(var d=1;d=k&&(k=m,g=c,h=e,l=f)}a[d-1][1]!=g&&(g?a[d-1][1]=g:(a.splice(d- +diff_match_patch.prototype.diff_cleanupSemanticLossless=function(a){function b(n,w){if(!n||!w)return 6;var p=n.charAt(n.length-1),r=w.charAt(0),u=p.match(diff_match_patch.nonAlphaNumericRegex_),v=r.match(diff_match_patch.nonAlphaNumericRegex_),q=u&&p.match(diff_match_patch.whitespaceRegex_),t=v&&r.match(diff_match_patch.whitespaceRegex_);p=q&&p.match(diff_match_patch.linebreakRegex_);r=t&&r.match(diff_match_patch.linebreakRegex_);var y=p&&n.match(diff_match_patch.blanklineEndRegex_),x=r&&w.match(diff_match_patch.blanklineStartRegex_); +return y||x?5:p||r?4:u&&!q&&t?3:q||t?2:u||v?1:0}for(var d=1;d=k&&(k=m,g=c,h=e,l=f)}a[d-1][1]!=g&&(g?a[d-1][1]=g:(a.splice(d- 1,1),d--),a[d][1]=h,l?a[d+1][1]=l:(a.splice(d+1,1),d--))}d++}};diff_match_patch.nonAlphaNumericRegex_=/[^a-zA-Z0-9]/;diff_match_patch.whitespaceRegex_=/\s/;diff_match_patch.linebreakRegex_=/[\r\n]/;diff_match_patch.blanklineEndRegex_=/\n\r?\n$/;diff_match_patch.blanklineStartRegex_=/^\r?\n\r?\n/; diff_match_patch.prototype.diff_cleanupEfficiency=function(a){for(var b=!1,d=[],c=0,e=null,f=0,g=!1,h=!1,l=!1,k=!1;fb)break;e=d;f=c}return a.length!=g&&a[g][0]===DIFF_DELETE?f:f+(b-e)}; -diff_match_patch.prototype.diff_prettyHtml=function(a){for(var b=[],d=/&/g,c=//g,f=/\n/g,g=0;g");switch(h){case DIFF_INSERT:b[g]=''+l+"";break;case DIFF_DELETE:b[g]=''+l+"";break;case DIFF_EQUAL:b[g]=""+l+""}}return b.join("")}; -diff_match_patch.prototype.diff_text1=function(a){for(var b=[],d=0;d=a};diff_match_patch.prototype.isLowSurrogate=function(a){a=a.charCodeAt(0);return 56320<=a&&57343>=a}; -diff_match_patch.prototype.diff_toDelta=function(a){for(var b=[],d,c=0;c=a}; +diff_match_patch.prototype.isLowSurrogate=function(a){a=a.charCodeAt(0);return 56320<=a&&57343>=a}; diff_match_patch.prototype.digit16=function(a){switch(a){case "0":return 0;case "1":return 1;case "2":return 2;case "3":return 3;case "4":return 4;case "5":return 5;case "6":return 6;case "7":return 7;case "8":return 8;case "9":return 9;case "A":case "a":return 10;case "B":case "b":return 11;case "C":case "c":return 12;case "D":case "d":return 13;case "E":case "e":return 14;case "F":case "f":return 15;default:throw Error("Invalid hex-code");}}; diff_match_patch.prototype.decodeURI=function(a){try{return decodeURI(a)}catch(h){for(var b=0,d="";b=c)){d+=String.fromCharCode((c&65535)>>>10&1023|55296);d+=String.fromCharCode(56320|c&1023); -b+=12;continue}throw new URIError("URI malformed");}}}}return d}}; +b+=12;continue}throw new URIError("URI malformed");}}}}return d}};diff_match_patch.prototype.diff_xIndex=function(a,b){var d=0,c=0,e=0,f=0,g;for(g=0;gb)break;e=d;f=c}return a.length!=g&&a[g][0]===DIFF_DELETE?f:f+(b-e)}; +diff_match_patch.prototype.diff_prettyHtml=function(a){a=this.diff_cleanupSplitSurrogates(a);for(var b=[],d=/&/g,c=//g,f=/\n/g,g=0;g");switch(h){case DIFF_INSERT:b[g]=''+l+"";break;case DIFF_DELETE:b[g]=''+l+"";break;case DIFF_EQUAL:b[g]=""+l+""}}return b.join("")}; +diff_match_patch.prototype.diff_text1=function(a){for(var b=[],d=0;dthis.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,d);-1!=h&&(g=Math.min(c(0,h),g),h=a.lastIndexOf(b,d+b.length),-1!=h&&(g=Math.min(c(0,h),g)));var l=1<=k;q--){var t=e[a.charAt(q-1)];m[q]=0===w?(m[q+1]<<1|1)&t:(m[q+1]<<1|1)&t|(x[q+1]|x[q])<<1|1|x[q+1];if(m[q]&l&&(t=c(w,q-1),t<=g))if(g=t,h=q-1,h>d)k=Math.max(1,2*d-h);else break}if(c(w+1,d)>g)break;x=m}return h}; +diff_match_patch.prototype.match_bitap_=function(a,b,d){function c(v,q){var t=v/b.length,y=Math.abs(d-q);return f.Match_Distance?t+y/f.Match_Distance:y?1:t}if(b.length>this.Match_MaxBits)throw Error("Pattern too long for this browser.");var e=this.match_alphabet_(b),f=this,g=this.Match_Threshold,h=a.indexOf(b,d);-1!=h&&(g=Math.min(c(0,h),g),h=a.lastIndexOf(b,d+b.length),-1!=h&&(g=Math.min(c(0,h),g)));var l=1<=k;r--){var u=e[a.charAt(r-1)];m[r]=0===p?(m[r+1]<<1|1)&u:(m[r+1]<<1|1)&u|(w[r+1]|w[r])<<1|1|w[r+1];if(m[r]&l&&(u=c(p,r-1),u<=g))if(g=u,h=r-1,h>d)k=Math.max(1,2*d-h);else break}if(c(p+1,d)>g)break;w=m}return h}; diff_match_patch.prototype.match_alphabet_=function(a){for(var b={},d=0;d=2*this.Patch_Margin&&e&&(this.patch_addContext_(a,h),d.push(a),a=new diff_match_patch.patch_obj,e=0,h=c,f=g)}k!==DIFF_INSERT&&(f+=m.length);k!==DIFF_DELETE&&(g+=m.length)}e&&(this.patch_addContext_(a,h),d.push(a));return d}; +if(0===b.length)return[];b=this.diff_cleanupSplitSurrogates(b);d=[];a=new diff_match_patch.patch_obj;for(var e=0,f=0,g=0,h=c,l=0;l=2*this.Patch_Margin&&e&&(this.patch_addContext_(a,h),d.push(a),a=new diff_match_patch.patch_obj,e=0,h=c,f=g)}k!==DIFF_INSERT&&(f+=m.length);k!==DIFF_DELETE&&(g+=m.length)}e&&(this.patch_addContext_(a,h),d.push(a));return d}; diff_match_patch.prototype.patch_deepCopy=function(a){for(var b=[],d=0;dthis.Match_MaxBits){var k=this.match_main(b,h.substring(0,this.Match_MaxBits),g);-1!=k&&(l=this.match_main(b,h.substring(h.length-this.Match_MaxBits),g+h.length-this.Match_MaxBits),-1==l||k>=l)&&(k=-1)}else k=this.match_main(b,h, -g);if(-1==k)e[f]=!1,c-=a[f].length2-a[f].length1;else if(e[f]=!0,c=k-g,g=-1==l?b.substring(k,k+h.length):b.substring(k,l+this.Match_MaxBits),h==g)b=b.substring(0,k)+this.diff_text2(a[f].diffs)+b.substring(k+h.length);else if(g=this.diff_main(h,g,!1),h.length>this.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);h=0;var m;for(l=0;lthis.Match_MaxBits&&this.diff_levenshtein(g)/h.length>this.Patch_DeleteThreshold)e[f]=!1;else{this.diff_cleanupSemanticLossless(g);h=0;var m;for(l=0;le[0][1].length){var f=b-e[0][1].length;e[0][1]=d.substring(e[0][1].length)+e[0][1];c.start1-=f;c.start2-=f;c.length1+=f;c.length2+=f}c=a[a.length-1];e=c.diffs; 0==e.length||e[e.length-1][0]!=DIFF_EQUAL?(e.push(new diff_match_patch.Diff(DIFF_EQUAL,d)),c.length1+=b,c.length2+=b):b>e[e.length-1][1].length&&(f=b-e[e.length-1][1].length,e[e.length-1][1]+=d.substring(0,f),c.length1+=f,c.length2+=f);return d}; diff_match_patch.prototype.patch_splitMax=function(a){for(var b=this.Match_MaxBits,d=0;d} diffs Array of diff tuples. + */ +diff_match_patch.prototype.diff_cleanupSplitSurrogates = function(diffs) { + var lastEnd; + for (var x = 0; x < diffs.length; x++) { + var thisDiff = diffs[x]; + var thisTop = thisDiff[1][0]; + var thisEnd = thisDiff[1][thisDiff[1].length - 1]; + + if (0 === thisDiff[1].length) { + diffs.splice(x--, 1); + continue; + } + + if (thisEnd && this.isHighSurrogate(thisEnd)) { + lastEnd = thisEnd; + thisDiff[1] = thisDiff[1].slice(0, -1); + } + + if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) { + thisDiff[1] = lastEnd + thisDiff[1]; + } + + if (0 === thisDiff[1].length) { + diffs.splice(x--, 1); + continue; + } + } + + return diffs; +}; + +diff_match_patch.prototype.isHighSurrogate = function(c) { + var v = c.charCodeAt(0); + return v >= 0xD800 && v <= 0xDBFF; +}; + +diff_match_patch.prototype.isLowSurrogate = function(c) { + var v = c.charCodeAt(0); + return v >= 0xDC00 && v <= 0xDFFF; +}; + +diff_match_patch.prototype.digit16 = function(c) { + switch (c) { + case '0': return 0; + case '1': return 1; + case '2': return 2; + case '3': return 3; + case '4': return 4; + case '5': return 5; + case '6': return 6; + case '7': return 7; + case '8': return 8; + case '9': return 9; + case 'A': case 'a': return 10; + case 'B': case 'b': return 11; + case 'C': case 'c': return 12; + case 'D': case 'd': return 13; + case 'E': case 'e': return 14; + case 'F': case 'f': return 15; + default: throw new Error('Invalid hex-code'); + } +}; + +/** + * Decode URI-encoded string but allow for encoded surrogate halves + * + * diff_match_patch needs this relaxation of the requirements because + * not all libraries and versions produce valid URI strings in toDelta + * and we don't want to crash this code when the input is valid input + * but at the same time invalid utf-8 + * + * @example: decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70' + * @example: decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c' + * + * @cite: @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js + * + * @param {String} text input string encoded by encodeURI() or equivalent + * @return {String} + */ +diff_match_patch.prototype.decodeURI = function(text) { + try { + return decodeURI(text); + } catch ( e ) { + var i = 0; + var decoded = ''; + + while (i < text.length) { + if ( text[i] !== '%' ) { + decoded += text[i++]; + continue; + } + + // start a percent-sequence + var byte1 = (this.digit16(text[i + 1]) << 4) + this.digit16(text[i + 2]); + if ((byte1 & 0x80) === 0) { + decoded += String.fromCharCode(byte1); + i += 3; + continue; + } + + if ('%' !== text[i + 3]) { + throw new URIError('URI malformed'); + } + + var byte2 = (this.digit16(text[i + 4]) << 4) + this.digit16(text[i + 5]); + if ((byte2 & 0xC0) !== 0x80) { + throw new URIError('URI malformed'); + } + byte2 = byte2 & 0x3F; + if ((byte1 & 0xE0) === 0xC0) { + decoded += String.fromCharCode(((byte1 & 0x1F) << 6) | byte2); + i += 6; + continue; + } + + if ('%' !== text[i + 6]) { + throw new URIError('URI malformed'); + } + + var byte3 = (this.digit16(text[i + 7]) << 4) + this.digit16(text[i + 8]); + if ((byte3 & 0xC0) !== 0x80) { + throw new URIError('URI malformed'); + } + byte3 = byte3 & 0x3F; + if ((byte1 & 0xF0) === 0xE0) { + // unpaired surrogate are fine here + decoded += String.fromCharCode(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3); + i += 9; + continue; + } + + if ('%' !== text[i + 9]) { + throw new URIError('URI malformed'); + } + + var byte4 = (this.digit16(text[i + 10]) << 4) + this.digit16(text[i + 11]); + if ((byte4 & 0xC0) !== 0x80) { + throw new URIError('URI malformed'); + } + byte4 = byte4 & 0x3F; + if ((byte1 & 0xF8) === 0xF0) { + var codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4; + if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { + decoded += String.fromCharCode((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800); + decoded += String.fromCharCode(0xDC00 | (codePoint & 0xFFFF) & 0x3FF); + i += 12; + continue; + } + } + + throw new URIError('URI malformed'); + } + + return decoded; + } +}; /** * loc is a location in text1, compute and return the equivalent location in @@ -1249,6 +1409,7 @@ diff_match_patch.prototype.diff_xIndex = function(diffs, loc) { * @return {string} HTML representation. */ diff_match_patch.prototype.diff_prettyHtml = function(diffs) { + diffs = this.diff_cleanupSplitSurrogates(diffs); var html = []; var pattern_amp = /&/g; var pattern_lt = /= 0xD800 && v <= 0xDBFF; -} - -diff_match_patch.prototype.isLowSurrogate = function(c) { - var v = c.charCodeAt(0); - return v >= 0xDC00 && v <= 0xDFFF; -} /** * Crush the diff into an encoded string which describes the operations @@ -1358,162 +1510,24 @@ diff_match_patch.prototype.isLowSurrogate = function(c) { * @return {string} Delta text. */ diff_match_patch.prototype.diff_toDelta = function(diffs) { + diffs = this.diff_cleanupSplitSurrogates(diffs); var text = []; - var lastEnd; for (var x = 0; x < diffs.length; x++) { - var thisDiff = diffs[x]; - var thisTop = thisDiff[1][0]; - var thisEnd = thisDiff[1][thisDiff[1].length - 1]; - - if (0 === thisDiff[1].length) { - continue; - } - - // trap a trailing high-surrogate so we can - // distribute it to the successive edits - if (thisEnd && this.isHighSurrogate(thisEnd)) { - lastEnd = thisEnd; - thisDiff[1] = thisDiff[1].slice(0, -1); - } - - if (lastEnd && thisTop && this.isHighSurrogate(lastEnd) && this.isLowSurrogate(thisTop)) { - thisDiff[1] = lastEnd + thisDiff[1]; - } - - if (0 === thisDiff[1].length) { - continue; - } - - switch (thisDiff[0]) { + switch (diffs[x][0]) { case DIFF_INSERT: - text.push('+' + encodeURI(thisDiff[1])); + text[x] = '+' + encodeURI(diffs[x][1]); break; case DIFF_DELETE: - text.push('-' + thisDiff[1].length); + text[x] = '-' + diffs[x][1].length; break; case DIFF_EQUAL: - text.push('=' + thisDiff[1].length); + text[x] = '=' + diffs[x][1].length; break; } } return text.join('\t').replace(/%20/g, ' '); }; -diff_match_patch.prototype.digit16 = function(c) { - switch (c) { - case '0': return 0; - case '1': return 1; - case '2': return 2; - case '3': return 3; - case '4': return 4; - case '5': return 5; - case '6': return 6; - case '7': return 7; - case '8': return 8; - case '9': return 9; - case 'A': case 'a': return 10; - case 'B': case 'b': return 11; - case 'C': case 'c': return 12; - case 'D': case 'd': return 13; - case 'E': case 'e': return 14; - case 'F': case 'f': return 15; - default: throw new Error('Invalid hex-code'); - } -}; - -/** - * Decode URI-encoded string but allow for encoded surrogate halves - * - * diff_match_patch needs this relaxation of the requirements because - * not all libraries and versions produce valid URI strings in toDelta - * and we don't want to crash this code when the input is valid input - * but at the same time invalid utf-8 - * - * @example: decodeURI( 'abcd%3A %F0%9F%85%B0' ) = 'abcd: \ud83c\udd70' - * @example: decodeURI( 'abcd%3A %ED%A0%BC' ) = 'abcd: \ud83c' - * - * @cite: @mathiasbynens utf8.js at https://github.com/mathiasbynens/utf8.js - * - * @param {String} text input string encoded by encodeURI() or equivalent - * @return {String} - */ -diff_match_patch.prototype.decodeURI = function(text) { - try { - return decodeURI(text); - } catch ( e ) { - var i = 0; - var decoded = ''; - - while (i < text.length) { - if ( text[i] !== '%' ) { - decoded += text[i++]; - continue; - } - - // start a percent-sequence - var byte1 = (this.digit16(text[i + 1]) << 4) + this.digit16(text[i + 2]); - if ((byte1 & 0x80) === 0) { - decoded += String.fromCharCode(byte1); - i += 3; - continue; - } - - if ('%' !== text[i + 3]) { - throw new URIError('URI malformed'); - } - - var byte2 = (this.digit16(text[i + 4]) << 4) + this.digit16(text[i + 5]); - if ((byte2 & 0xC0) !== 0x80) { - throw new URIError('URI malformed'); - } - byte2 = byte2 & 0x3F; - if ((byte1 & 0xE0) === 0xC0) { - decoded += String.fromCharCode(((byte1 & 0x1F) << 6) | byte2); - i += 6; - continue; - } - - if ('%' !== text[i + 6]) { - throw new URIError('URI malformed'); - } - - var byte3 = (this.digit16(text[i + 7]) << 4) + this.digit16(text[i + 8]); - if ((byte3 & 0xC0) !== 0x80) { - throw new URIError('URI malformed'); - } - byte3 = byte3 & 0x3F; - if ((byte1 & 0xF0) === 0xE0) { - // unpaired surrogate are fine here - decoded += String.fromCharCode(((byte1 & 0x0F) << 12) | (byte2 << 6) | byte3); - i += 9; - continue; - } - - if ('%' !== text[i + 9]) { - throw new URIError('URI malformed'); - } - - var byte4 = (this.digit16(text[i + 10]) << 4) + this.digit16(text[i + 11]); - if ((byte4 & 0xC0) !== 0x80) { - throw new URIError('URI malformed'); - } - byte4 = byte4 & 0x3F; - if ((byte1 & 0xF8) === 0xF0) { - var codePoint = ((byte1 & 0x07) << 0x12) | (byte2 << 0x0C) | (byte3 << 0x06) | byte4; - if (codePoint >= 0x010000 && codePoint <= 0x10FFFF) { - decoded += String.fromCharCode((codePoint & 0xFFFF) >>> 10 & 0x3FF | 0xD800); - decoded += String.fromCharCode(0xDC00 | (codePoint & 0xFFFF) & 0x3FF); - i += 12; - continue; - } - } - - throw new URIError('URI malformed'); - } - - return decoded; - } -}; /** * Given the original text1, and an encoded string which describes the @@ -1775,11 +1789,23 @@ diff_match_patch.prototype.patch_addContext_ = function(patch, text) { padding += this.Patch_Margin; // Add the prefix. + if ( + patch.start2 - padding > 0 && + diff_match_patch.prototype.isLowSurrogate(text[patch.start2 - padding]) + ) { + padding++; + } var prefix = text.substring(patch.start2 - padding, patch.start2); if (prefix) { patch.diffs.unshift(new diff_match_patch.Diff(DIFF_EQUAL, prefix)); } // Add the suffix. + if ( + patch.start2 + patch.length1 + padding < text.length && + diff_match_patch.prototype.isHighSurrogate(text[patch.start2 + patch.length1 + padding]) + ) { + padding++; + } var suffix = text.substring(patch.start2 + patch.length1, patch.start2 + patch.length1 + padding); if (suffix) { @@ -1853,6 +1879,7 @@ diff_match_patch.prototype.patch_make = function(a, opt_b, opt_c) { if (diffs.length === 0) { return []; // Get rid of the null case. } + diffs = this.diff_cleanupSplitSurrogates(diffs); var patches = []; var patch = new diff_match_patch.patch_obj(); var patchDiffLength = 0; // Keeping our own length var is faster in JS. @@ -2350,6 +2377,7 @@ diff_match_patch.patch_obj.prototype.toString = function() { var text = ['@@ -' + coords1 + ' +' + coords2 + ' @@\n']; var op; // Escape the body of the patch with %xx notation. + diff_match_patch.prototype.diff_cleanupSplitSurrogates(this.diffs); for (var x = 0; x < this.diffs.length; x++) { switch (this.diffs[x][0]) { case DIFF_INSERT: