// /* Name: wDiff.js Version: 0.9.9 (October 10, 2010) Info: http://en.wikipedia.org/wiki/User:Cacycle/diff Code: http://en.wikipedia.org/wiki/User:Cacycle/diff.js JavaScript diff algorithm by [[en:User:Cacycle]] (http://en.wikipedia.org/wiki/User_talk:Cacycle). Outputs html/css-formatted new text with highlighted deletions, inserts, and block moves. For newline highlighting the following style rules have to be added to the document: .wDiffParagraph:before { content: "¶"; }; The program uses cross-browser code and should work with all modern browsers. It has been tested with: * Mozilla Firefox 1.5.0.1 * Mozilla SeaMonkey 1.0 * Opera 8.53 * Internet Explorer 6.0.2900.2180 * Internet Explorer 7.0.5730.11 This program is also compatible with Greasemonkey An implementation of the word-based algorithm from: Communications of the ACM 21(4):264 (1978) http://doi.acm.org/10.1145/359460.359467 With the following additional feature: * Word types have been optimized for MediaWiki source texts * Additional post-pass 5 code for resolving islands caused by adding two common words at the end of sequences of common words * Additional detection of block borders and color coding of moved blocks and their original position * Optional "intelligent" omission of unchanged parts from the output This code is used by the MediaWiki in-browser text editors [[en:User:Cacycle/editor]] and [[en:User:Cacycle/wikEd]] and the enhanced diff view tool wikEdDiff [[en:User:Cacycle/wikEd]]. Usage: var htmlText = WDiffString(oldText, newText); This code has been released into the public domain. Datastructures (abbreviations from publication): text: an object that holds all text related datastructures .newWords: consecutive words of the new text (N) .oldWords: consecutive words of the old text (O) .newToOld: array pointing to corresponding word number in old text (NA) .oldToNew: array pointing to corresponding word number in new text (OA) .message: output message for testing purposes symbol table: symbols[word]: associative array (object) of detected words for passes 1 - 3, points to symbol[i] symbol[i]: array of objects that hold word counters and pointers: .newCtr: new word occurences counter (NC) .oldCtr: old word occurences counter (OC) .toNew: first word occurrence in new text, points to text.newWords[i] .toOld: last word occurrence in old text, points to text.oldWords[i] block: an object that holds block move information blocks indexed after new text: .newStart: new text word number of start of this block .newLength: element number of this block including non-words .newWords: true word number of this block .newNumber: corresponding block index in old text .newBlock: moved-block-number of a block that has been moved here .newLeft: moved-block-number of a block that has been moved from this border leftwards .newRight: moved-block-number of a block that has been moved from this border rightwards .newLeftIndex: index number of a block that has been moved from this border leftwards .newRightIndex: index number of a block that has been moved from this border rightwards blocks indexed after old text: .oldStart: word number of start of this block .oldToNew: corresponding new text word number of start .oldLength: element number of this block including non-words .oldWords: true word number of this block */ // css for change indicators if (typeof(wDiffStyleDelete) == 'undefined') { wDiffStyleDelete = 'font-weight: normal; text-decoration: none; color: #fff; background-color: #990033;'; } if (typeof(wDiffStyleInsert) == 'undefined') { wDiffStyleInsert = 'font-weight: normal; text-decoration: none; color: #fff; background-color: #009933;'; } if (typeof(wDiffStyleMoved) == 'undefined') { wDiffStyleMoved = 'font-weight: bold; color: #000; vertical-align: text-bottom; font-size: xx-small; padding: 0; border: solid 1px;'; } if (typeof(wDiffStyleBlock) == 'undefined') { wDiffStyleBlock = [ 'color: #000; background-color: #ffff80;', 'color: #000; background-color: #c0ffff;', 'color: #000; background-color: #ffd0f0;', 'color: #000; background-color: #ffe080;', 'color: #000; background-color: #aaddff;', 'color: #000; background-color: #ddaaff;', 'color: #000; background-color: #ffbbbb;', 'color: #000; background-color: #d8ffa0;', 'color: #000; background-color: #d0d0d0;' ]; } // html for change indicators, {number} is replaced by the block number // {block} is replaced by the block style, class and html comments are important for shortening the output if (typeof(wDiffHtmlMovedRight) == 'undefined') { wDiffHtmlMovedRight = ''; } if (typeof(wDiffHtmlMovedLeft) == 'undefined') { wDiffHtmlMovedLeft = ''; } if (typeof(wDiffHtmlBlockStart) == 'undefined') { wDiffHtmlBlockStart = ''; } if (typeof(wDiffHtmlBlockEnd) == 'undefined') { wDiffHtmlBlockEnd = ''; } if (typeof(wDiffHtmlDeleteStart) == 'undefined') { wDiffHtmlDeleteStart = ''; } if (typeof(wDiffHtmlDeleteEnd) == 'undefined') { wDiffHtmlDeleteEnd = ''; } if (typeof(wDiffHtmlInsertStart) == 'undefined') { wDiffHtmlInsertStart = ''; } if (typeof(wDiffHtmlInsertEnd) == 'undefined') { wDiffHtmlInsertEnd = ''; } // minimal number of real words for a moved block (0 for always displaying block move indicators) if (typeof(wDiffBlockMinLength) == 'undefined') { wDiffBlockMinLength = 3; } // exclude identical sequence starts and endings from change marking if (typeof(wDiffWordDiff) == 'undefined') { wDiffWordDiff = true; } // enable recursive diff to resolve problematic sequences if (typeof(wDiffRecursiveDiff) == 'undefined') { wDiffRecursiveDiff = true; } // enable block move display if (typeof(wDiffShowBlockMoves) == 'undefined') { wDiffShowBlockMoves = true; } // remove unchanged parts from final output // characters before diff tag to search for previous heading, paragraph, line break, cut characters if (typeof(wDiffHeadingBefore) == 'undefined') { wDiffHeadingBefore = 1500; } if (typeof(wDiffParagraphBefore) == 'undefined') { wDiffParagraphBefore = 1500; } if (typeof(wDiffLineBeforeMax) == 'undefined') { wDiffLineBeforeMax = 1000; } if (typeof(wDiffLineBeforeMin) == 'undefined') { wDiffLineBeforeMin = 500; } if (typeof(wDiffBlankBeforeMax) == 'undefined') { wDiffBlankBeforeMax = 1000; } if (typeof(wDiffBlankBeforeMin) == 'undefined') { wDiffBlankBeforeMin = 500; } if (typeof(wDiffCharsBefore) == 'undefined') { wDiffCharsBefore = 500; } // characters after diff tag to search for next heading, paragraph, line break, or characters if (typeof(wDiffHeadingAfter) == 'undefined') { wDiffHeadingAfter = 1500; } if (typeof(wDiffParagraphAfter) == 'undefined') { wDiffParagraphAfter = 1500; } if (typeof(wDiffLineAfterMax) == 'undefined') { wDiffLineAfterMax = 1000; } if (typeof(wDiffLineAfterMin) == 'undefined') { wDiffLineAfterMin = 500; } if (typeof(wDiffBlankAfterMax) == 'undefined') { wDiffBlankAfterMax = 1000; } if (typeof(wDiffBlankAfterMin) == 'undefined') { wDiffBlankAfterMin = 500; } if (typeof(wDiffCharsAfter) == 'undefined') { wDiffCharsAfter = 500; } // maximal fragment distance to join close fragments if (typeof(wDiffFragmentJoin) == 'undefined') { wDiffFragmentJoin = 1000; } if (typeof(wDiffOmittedChars) == 'undefined') { wDiffOmittedChars = '…'; } if (typeof(wDiffOmittedLines) == 'undefined') { wDiffOmittedLines = '
'; } if (typeof(wDiffNoChange) == 'undefined') { wDiffNoChange = '
'; } // compatibility fix for old name of main function //StringDiff = WDiffString; // WDiffString: main program // input: oldText, newText, strings containing the texts // returns: html diff WDiffString = function(oldText, newText) { // IE / Mac fix oldText = oldText.replace(/\r\n?/g, '\n'); newText = newText.replace(/\r\n?/g, '\n'); var text = {}; text.newWords = []; text.oldWords = []; text.newToOld = []; text.oldToNew = []; text.message = ''; var block = {}; var outText = ''; // trap trivial changes: no change if (oldText == newText) { outText = newText; //outText = WDiffEscape(outText); //outText = WDiffHtmlFormat(outText); return(outText); } // trap trivial changes: old text deleted if ( (oldText == null) || (oldText.length == 0) ) { outText = newText; outText = WDiffEscape(outText); outText = WDiffHtmlFormat(outText); outText = wDiffHtmlInsertStart + outText + wDiffHtmlInsertEnd; return(outText); } // trap trivial changes: new text deleted if ( (newText == null) || (newText.length == 0) ) { outText = oldText; outText = WDiffEscape(outText); outText = WDiffHtmlFormat(outText); outText = wDiffHtmlDeleteStart + outText + wDiffHtmlDeleteEnd; return(outText); } // split new and old text into words WDiffSplitText(oldText, newText, text); // calculate diff information WDiffText(text); //detect block borders and moved blocks WDiffDetectBlocks(text, block); // process diff data into formatted html text outText = WDiffToHtml(text, block); // IE fix outText = outText.replace(/> ( *) $1<'); return(outText); }; // WDiffSplitText: split new and old text into words // input: oldText, newText, strings containing the texts // changes: text.newWords and text.oldWords, arrays containing the texts in arrays of words WDiffSplitText = function(oldText, newText, text) { // convert strange spaces oldText = oldText.replace(/[\t\u000b\u00a0\u2028\u2029]+/g, ' '); newText = newText.replace(/[\t\u000b\u00a0\u2028\u2029]+/g, ' '); // split old text into words // / | | | | | | | | | | | | | | / var pattern = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g; var result; do { result = pattern.exec(oldText); if (result != null) { text.oldWords.push(result[0]); } } while (result != null); // split new text into words do { result = pattern.exec(newText); if (result != null) { text.newWords.push(result[0]); } } while (result != null); return; }; // WDiffText: calculate diff information // input: text.newWords and text.oldWords, arrays containing the texts as arrays of words // optionally for recursive calls: newStart, newEnd, oldStart, oldEnd, recursionLevel // changes: text.newToOld and text.oldToNew, arrays pointing to corresponding words WDiffText = function(text, newStart, newEnd, oldStart, oldEnd, recursionLevel) { var symbol = []; var symbols = {}; // set defaults if (typeof(newStart) == 'undefined') { newStart = 0; } if (typeof(newEnd) == 'undefined') { newEnd = text.newWords.length; } if (typeof(oldStart) == 'undefined') { oldStart = 0; } if (typeof(oldEnd) == 'undefined') { oldEnd = text.oldWords.length; } if (typeof(recursionLevel) == 'undefined') { recursionLevel = 0; } // limit recursion depth if (recursionLevel > 10) { return; } // // pass 1: Parse new text into symbol table // for (var i = newStart; i < newEnd; i ++) { var word = text.newWords[i]; // preserve the native method if (word.indexOf('hasOwnProperty') == 0) { word = word.replace(/^(hasOwnProperty_*)$/, '$1_'); } // add new entry to symbol table if (symbols.hasOwnProperty(word) == false) { var last = symbol.length; symbols[word] = last; symbol[last] = { newCtr: 1, oldCtr: 0, toNew: i, toOld: null }; } // or update existing entry else { // increment word counter for new text var hashToArray = symbols[word]; symbol[hashToArray].newCtr ++; } } // // pass 2: parse old text into symbol table // for (var i = oldStart; i < oldEnd; i ++) { var word = text.oldWords[i]; // preserve the native method if (word.indexOf('hasOwnProperty') == 0) { word = word.replace(/^(hasOwnProperty_*)$/, '$1_'); } // add new entry to symbol table if (symbols.hasOwnProperty(word) == false) { var last = symbol.length; symbols[word] = last; symbol[last] = { newCtr: 0, oldCtr: 1, toNew: null, toOld: i }; } // or update existing entry else { // increment word counter for old text var hashToArray = symbols[word]; symbol[hashToArray].oldCtr ++; // add word number for old text symbol[hashToArray].toOld = i; } } // // pass 3: connect unique words // for (var i = 0; i < symbol.length; i ++) { // find words in the symbol table that occur only once in both versions if ( (symbol[i].newCtr == 1) && (symbol[i].oldCtr == 1) ) { var toNew = symbol[i].toNew; var toOld = symbol[i].toOld; // do not use spaces as unique markers if (/^\s+$/.test(text.newWords[toNew]) == false) { // connect from new to old and from old to new text.newToOld[toNew] = toOld; text.oldToNew[toOld] = toNew; } } } // // pass 4: connect adjacent identical words downwards // for (var i = newStart; i < newEnd - 1; i ++) { // find already connected pairs if (text.newToOld[i] != null) { var j = text.newToOld[i]; // check if the following words are not yet connected if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) { // connect if the following words are the same if (text.newWords[i + 1] == text.oldWords[j + 1]) { text.newToOld[i + 1] = j + 1; text.oldToNew[j + 1] = i + 1; } } } } // // pass 5: connect adjacent identical words upwards // for (var i = newEnd - 1; i > newStart; i --) { // find already connected pairs if (text.newToOld[i] != null) { var j = text.newToOld[i]; // check if the preceeding words are not yet connected if ( (text.newToOld[i - 1] == null) && (text.oldToNew[j - 1] == null) ) { // connect if the preceeding words are the same if ( text.newWords[i - 1] == text.oldWords[j - 1] ) { text.newToOld[i - 1] = j - 1; text.oldToNew[j - 1] = i - 1; } } } } // // "pass" 6: recursively diff still unresolved regions downwards // if (wDiffRecursiveDiff == true) { var i = newStart; var j = oldStart; while (i < newEnd) { if (text.newToOld[i - 1] != null) { j = text.newToOld[i - 1] + 1; } // check for the start of an unresolved sequence if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { // determine the ends of the sequences var iStart = i; var iEnd = i; while ( (text.newToOld[iEnd] == null) && (iEnd < newEnd) ) { iEnd ++; } var iLength = iEnd - iStart; var jStart = j; var jEnd = j; while ( (text.oldToNew[jEnd] == null) && (jEnd < oldEnd) ) { jEnd ++; } var jLength = jEnd - jStart; // recursively diff the unresolved sequence if ( (iLength > 0) && (jLength > 0) ) { if ( (iLength > 1) || (jLength > 1) ) { if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { WDiffText(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1); } } } i = iEnd; } else { i ++; } } } // // "pass" 7: recursively diff still unresolved regions upwards // if (wDiffRecursiveDiff == true) { var i = newEnd - 1; var j = oldEnd - 1; while (i >= newStart) { if (text.newToOld[i + 1] != null) { j = text.newToOld[i + 1] - 1; } // check for the start of an unresolved sequence if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { // determine the ends of the sequences var iStart = i; var iEnd = i + 1; while ( (text.newToOld[iStart - 1] == null) && (iStart >= newStart) ) { iStart --; } if (iStart < 0) { iStart = 0; } var iLength = iEnd - iStart; var jStart = j; var jEnd = j + 1; while ( (text.oldToNew[jStart - 1] == null) && (jStart >= oldStart) ) { jStart --; } if (jStart < 0) { jStart = 0; } var jLength = jEnd - jStart; // recursively diff the unresolved sequence if ( (iLength > 0) && (jLength > 0) ) { if ( (iLength > 1) || (jLength > 1) ) { if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { WDiffText(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1); } } } i = iStart - 1; } else { i --; } } } return; }; // WDiffToHtml: process diff data into formatted html text // input: text.newWords and text.oldWords, arrays containing the texts in arrays of words // text.newToOld and text.oldToNew, arrays pointing to corresponding words // block data structure // returns: outText, a html string WDiffToHtml = function(text, block) { var outText = text.message; var blockNumber = 0; var i = 0; var j = 0; var movedAsInsertion; // cycle through the new text do { var movedIndex = []; var movedBlock = []; var movedLeft = []; var blockText = ''; var identText = ''; var delText = ''; var insText = ''; var identStart = ''; // check if a block ends here and finish previous block if (movedAsInsertion != null) { if (movedAsInsertion == false) { identStart += wDiffHtmlBlockEnd; } else { identStart += wDiffHtmlInsertEnd; } movedAsInsertion = null; } // detect block boundary if ( (text.newToOld[i] != j) || (blockNumber == 0 ) ) { if ( ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) && ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) ) { // block moved right var moved = block.newRight[blockNumber]; if (moved > 0) { var index = block.newRightIndex[blockNumber]; movedIndex.push(index); movedBlock.push(moved); movedLeft.push(false); } // block moved left moved = block.newLeft[blockNumber]; if (moved > 0) { var index = block.newLeftIndex[blockNumber]; movedIndex.push(index); movedBlock.push(moved); movedLeft.push(true); } // check if a block starts here moved = block.newBlock[blockNumber]; if (moved > 0) { // mark block as inserted text if (block.newWords[blockNumber] < wDiffBlockMinLength) { identStart += wDiffHtmlInsertStart; movedAsInsertion = true; } // mark block by color else { if (moved > wDiffStyleBlock.length) { moved = wDiffStyleBlock.length; } identStart += WDiffHtmlCustomize(wDiffHtmlBlockStart, moved - 1); movedAsInsertion = false; } } if (i >= text.newWords.length) { i ++; } else { j = text.newToOld[i]; blockNumber ++; } } } // get the correct order if moved to the left as well as to the right from here if (movedIndex.length == 2) { if (movedIndex[0] > movedIndex[1]) { movedIndex.reverse(); movedBlock.reverse(); movedLeft.reverse(); } } // handle left and right block moves from this position for (var m = 0; m < movedIndex.length; m ++) { // insert the block as deleted text if (block.newWords[ movedIndex[m] ] < wDiffBlockMinLength) { var movedStart = block.newStart[ movedIndex[m] ]; var movedLength = block.newLength[ movedIndex[m] ]; var str = ''; for (var n = movedStart; n < movedStart + movedLength; n ++) { str += text.newWords[n]; } str = WDiffEscape(str); str = str.replace(/\n/g, '
'); blockText += wDiffHtmlDeleteStart + str + wDiffHtmlDeleteEnd; } // add a placeholder / move direction indicator else { if (movedBlock[m] > wDiffStyleBlock.length) { movedBlock[m] = wDiffStyleBlock.length; } if (movedLeft[m]) { blockText += WDiffHtmlCustomize(wDiffHtmlMovedLeft, movedBlock[m] - 1); } else { blockText += WDiffHtmlCustomize(wDiffHtmlMovedRight, movedBlock[m] - 1); } } } // collect consecutive identical text while ( (i < text.newWords.length) && (j < text.oldWords.length) ) { if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) { break; } if (text.newToOld[i] != j) { break; } identText += text.newWords[i]; i ++; j ++; } // collect consecutive deletions while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) { delText += text.oldWords[j]; j ++; } // collect consecutive inserts while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) { insText += text.newWords[i]; i ++; } // remove leading and trailing similarities between delText and ins from highlighting var preText = ''; var postText = ''; if (wDiffWordDiff) { if ( (delText != '') && (insText != '') ) { // remove leading similarities while ( delText.charAt(0) == insText.charAt(0) && (delText != '') && (insText != '') ) { preText = preText + delText.charAt(0); delText = delText.substr(1); insText = insText.substr(1); } // remove trailing similarities while ( delText.charAt(delText.length - 1) == insText.charAt(insText.length - 1) && (delText != '') && (insText != '') ) { postText = delText.charAt(delText.length - 1) + postText; delText = delText.substr(0, delText.length - 1); insText = insText.substr(0, insText.length - 1); } } } // output the identical text, deletions and inserts // moved from here indicator if (blockText != '') { outText += blockText; } // identical text if (identText != '') { outText += identStart + WDiffEscape(identText); } outText += preText; // deleted text if (delText != '') { delText = wDiffHtmlDeleteStart + WDiffEscape(delText) + wDiffHtmlDeleteEnd; delText = delText.replace(/\n/g, '

'); outText += delText; } // inserted text if (insText != '') { insText = wDiffHtmlInsertStart + WDiffEscape(insText) + wDiffHtmlInsertEnd; insText = insText.replace(/\n/g, '

'); outText += insText; } outText += postText; } while (i <= text.newWords.length); outText += '\n'; outText = WDiffHtmlFormat(outText); return(outText); }; // WDiffEscape: replaces html-sensitive characters in output text with character entities WDiffEscape = function(text) { text = text.replace(/&/g, '&'); text = text.replace(//g, '>'); text = text.replace(/"/g, '"'); return(text); }; // HtmlCustomize: customize indicator html: replace {number} with the block number, {block} with the block style WDiffHtmlCustomize = function(text, block) { text = text.replace(/\{number\}/, block); text = text.replace(/\{block\}/, wDiffStyleBlock[block]); return(text); }; // HtmlFormat: replaces newlines and multiple spaces in text with html code WDiffHtmlFormat = function(text) { text = text.replace(/ {2}/g, '  '); text = text.replace(/\n/g, '

'); return(text); }; // WDiffDetectBlocks: detect block borders and moved blocks // input: text object, block object WDiffDetectBlocks = function(text, block) { block.oldStart = []; block.oldToNew = []; block.oldLength = []; block.oldWords = []; block.newStart = []; block.newLength = []; block.newWords = []; block.newNumber = []; block.newBlock = []; block.newLeft = []; block.newRight = []; block.newLeftIndex = []; block.newRightIndex = []; var blockNumber = 0; var wordCounter = 0; var realWordCounter = 0; // get old text block order if (wDiffShowBlockMoves) { var j = 0; var i = 0; do { // detect block boundaries on old text if ( (text.oldToNew[j] != i) || (blockNumber == 0 ) ) { if ( ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) && ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) ) { if (blockNumber > 0) { block.oldLength[blockNumber - 1] = wordCounter; block.oldWords[blockNumber - 1] = realWordCounter; wordCounter = 0; realWordCounter = 0; } if (j >= text.oldWords.length) { j ++; } else { i = text.oldToNew[j]; block.oldStart[blockNumber] = j; block.oldToNew[blockNumber] = text.oldToNew[j]; blockNumber ++; } } } // jump over identical pairs while ( (i < text.newWords.length) && (j < text.oldWords.length) ) { if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) { break; } if (text.oldToNew[j] != i) { break; } i ++; j ++; wordCounter ++; if ( /\w/.test( text.newWords[i] ) ) { realWordCounter ++; } } // jump over consecutive deletions while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) { j ++; } // jump over consecutive inserts while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) { i ++; } } while (j <= text.oldWords.length); // get the block order in the new text var lastMin; var currMinIndex; lastMin = null; // sort the data by increasing start numbers into new text block info for (var i = 0; i < blockNumber; i ++) { currMin = null; for (var j = 0; j < blockNumber; j ++) { curr = block.oldToNew[j]; if ( (curr > lastMin) || (lastMin == null) ) { if ( (curr < currMin) || (currMin == null) ) { currMin = curr; currMinIndex = j; } } } block.newStart[i] = block.oldToNew[currMinIndex]; block.newLength[i] = block.oldLength[currMinIndex]; block.newWords[i] = block.oldWords[currMinIndex]; block.newNumber[i] = currMinIndex; lastMin = currMin; } // detect not moved blocks for (var i = 0; i < blockNumber; i ++) { if (block.newBlock[i] == null) { if (block.newNumber[i] == i) { block.newBlock[i] = 0; } } } // detect switches of neighbouring blocks for (var i = 0; i < blockNumber - 1; i ++) { if ( (block.newBlock[i] == null) && (block.newBlock[i + 1] == null) ) { if (block.newNumber[i] - block.newNumber[i + 1] == 1) { if ( (block.newNumber[i + 1] - block.newNumber[i + 2] != 1) || (i + 2 >= blockNumber) ) { // the shorter one is declared the moved one if (block.newLength[i] < block.newLength[i + 1]) { block.newBlock[i] = 1; block.newBlock[i + 1] = 0; } else { block.newBlock[i] = 0; block.newBlock[i + 1] = 1; } } } } } // mark all others as moved and number the moved blocks j = 1; for (var i = 0; i < blockNumber; i ++) { if ( (block.newBlock[i] == null) || (block.newBlock[i] == 1) ) { block.newBlock[i] = j++; } } // check if a block has been moved from this block border for (var i = 0; i < blockNumber; i ++) { for (var j = 0; j < blockNumber; j ++) { if (block.newNumber[j] == i) { if (block.newBlock[j] > 0) { // block moved right if (block.newNumber[j] < j) { block.newRight[i] = block.newBlock[j]; block.newRightIndex[i] = j; } // block moved left else { block.newLeft[i + 1] = block.newBlock[j]; block.newLeftIndex[i + 1] = j; } } } } } } return; }; // WDiffShortenOutput: remove unchanged parts from final output // input: the output of WDiffString // returns: the text with removed unchanged passages indicated by (...) WDiffShortenOutput = function(diffText) { // html
to newlines diffText = diffText.replace(/]*>/g, '\n'); // scan for diff html tags var regExpDiff = /<\w+ class="(\w+)"[^>]*>(.|\n)*?/g; var tagStart = []; var tagEnd = []; var i = 0; var found; while ( (found = regExpDiff.exec(diffText)) != null ) { // combine consecutive diff tags if ( (i > 0) && (tagEnd[i - 1] == found.index) ) { tagEnd[i - 1] = found.index + found[0].length; } else { tagStart[i] = found.index; tagEnd[i] = found.index + found[0].length; i ++; } } // no diff tags detected if (tagStart.length == 0) { return(wDiffNoChange); } // define regexps var regExpHeading = /\n=+.+?=+ *\n|\n\{\||\n\|\}/g; var regExpParagraph = /\n\n+/g; var regExpLine = /\n+/g; var regExpBlank = /(<[^>]+>)*\s+/g; // determine fragment border positions around diff tags var rangeStart = []; var rangeEnd = []; var rangeStartType = []; var rangeEndType = []; for (var i = 0; i < tagStart.length; i ++) { var found; // find last heading before diff tag var lastPos = tagStart[i] - wDiffHeadingBefore; if (lastPos < 0) { lastPos = 0; } regExpHeading.lastIndex = lastPos; while ( (found = regExpHeading.exec(diffText)) != null ) { if (found.index > tagStart[i]) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'heading'; } // find last paragraph before diff tag if (rangeStart[i] == null) { lastPos = tagStart[i] - wDiffParagraphBefore; if (lastPos < 0) { lastPos = 0; } regExpParagraph.lastIndex = lastPos; while ( (found = regExpParagraph.exec(diffText)) != null ) { if (found.index > tagStart[i]) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'paragraph'; } } // find line break before diff tag if (rangeStart[i] == null) { lastPos = tagStart[i] - wDiffLineBeforeMax; if (lastPos < 0) { lastPos = 0; } regExpLine.lastIndex = lastPos; while ( (found = regExpLine.exec(diffText)) != null ) { if (found.index > tagStart[i] - wDiffLineBeforeMin) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'line'; } } // find blank before diff tag if (rangeStart[i] == null) { lastPos = tagStart[i] - wDiffBlankBeforeMax; if (lastPos < 0) { lastPos = 0; } regExpBlank.lastIndex = lastPos; while ( (found = regExpBlank.exec(diffText)) != null ) { if (found.index > tagStart[i] - wDiffBlankBeforeMin) { break; } rangeStart[i] = found.index; rangeStartType[i] = 'blank'; } } // fixed number of chars before diff tag if (rangeStart[i] == null) { rangeStart[i] = tagStart[i] - wDiffCharsBefore; rangeStartType[i] = 'chars'; if (rangeStart[i] < 0) { rangeStart[i] = 0; } } // find first heading after diff tag regExpHeading.lastIndex = tagEnd[i]; if ( (found = regExpHeading.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffHeadingAfter) { rangeEnd[i] = found.index + found[0].length; rangeEndType[i] = 'heading'; } } // find first paragraph after diff tag if (rangeEnd[i] == null) { regExpParagraph.lastIndex = tagEnd[i]; if ( (found = regExpParagraph.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffParagraphAfter) { rangeEnd[i] = found.index; rangeEndType[i] = 'paragraph'; } } } // find first line break after diff tag if (rangeEnd[i] == null) { regExpLine.lastIndex = tagEnd[i] + wDiffLineAfterMin; if ( (found = regExpLine.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffLineAfterMax) { rangeEnd[i] = found.index; rangeEndType[i] = 'break'; } } } // find blank after diff tag if (rangeEnd[i] == null) { regExpBlank.lastIndex = tagEnd[i] + wDiffBlankAfterMin; if ( (found = regExpBlank.exec(diffText)) != null ) { if (found.index < tagEnd[i] + wDiffBlankAfterMax) { rangeEnd[i] = found.index; rangeEndType[i] = 'blank'; } } } // fixed number of chars after diff tag if (rangeEnd[i] == null) { rangeEnd[i] = tagEnd[i] + wDiffCharsAfter; if (rangeEnd[i] > diffText.length) { rangeEnd[i] = diffText.length; rangeEndType[i] = 'chars'; } } } // remove overlaps, join close fragments var fragmentStart = []; var fragmentEnd = []; var fragmentStartType = []; var fragmentEndType = []; fragmentStart[0] = rangeStart[0]; fragmentEnd[0] = rangeEnd[0]; fragmentStartType[0] = rangeStartType[0]; fragmentEndType[0] = rangeEndType[0]; var j = 1; for (var i = 1; i < rangeStart.length; i ++) { if (rangeStart[i] > fragmentEnd[j - 1] + wDiffFragmentJoin) { fragmentStart[j] = rangeStart[i]; fragmentEnd[j] = rangeEnd[i]; fragmentStartType[j] = rangeStartType[i]; fragmentEndType[j] = rangeEndType[i]; j ++; } else { fragmentEnd[j - 1] = rangeEnd[i]; fragmentEndType[j - 1] = rangeEndType[i]; } } // assemble the fragments var outText = ''; for (var i = 0; i < fragmentStart.length; i ++) { // get text fragment var fragment = diffText.substring(fragmentStart[i], fragmentEnd[i]); var fragment = fragment.replace(/^\n+|\n+$/g, ''); // add inline marks for omitted chars and words if (fragmentStart[i] > 0) { if (fragmentStartType[i] == 'chars') { fragment = wDiffOmittedChars + fragment; } else if (fragmentStartType[i] == 'blank') { fragment = wDiffOmittedChars + ' ' + fragment; } } if (fragmentEnd[i] < diffText.length) { if (fragmentStartType[i] == 'chars') { fragment = fragment + wDiffOmittedChars; } else if (fragmentStartType[i] == 'blank') { fragment = fragment + ' ' + wDiffOmittedChars; } } // add omitted line separator if (fragmentStart[i] > 0) { outText += wDiffOmittedLines; } // encapsulate span errors outText += '
' + fragment + '
'; } // add trailing omitted line separator if (fragmentEnd[i - 1] < diffText.length) { outText = outText + wDiffOmittedLines; } // remove leading and trailing empty lines outText = outText.replace(/^(
)\n+|\n+(<\/div>)$/g, '$1$2'); // convert to html linebreaks outText = outText.replace(/\n/g, '
'); return(outText); };