diff options
Diffstat (limited to 'bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java')
-rw-r--r-- | bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java | 612 |
1 files changed, 0 insertions, 612 deletions
diff --git a/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java b/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java deleted file mode 100644 index 711fbd425..000000000 --- a/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java +++ /dev/null @@ -1,612 +0,0 @@ -/* - * Copyright (c) 2005, Joe Desbonnet, (jdesbonnet@gmail.com) - * All rights reserved. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * * Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * * Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * * Neither the name of the <organization> nor the - * names of its contributors may be used to endorse or promote products - * derived from this software without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY <copyright holder> ``AS IS'' AND ANY - * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED - * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE - * DISCLAIMED. IN NO EVENT SHALL <copyright holder> BE LIABLE FOR ANY - * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES - * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; - * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND - * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT - * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ -package ie.wombat.jbdiff; - -import java.io.BufferedInputStream; -import java.io.ByteArrayOutputStream; -import java.io.DataOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileOutputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.OutputStream; -import java.util.zip.GZIPOutputStream; - -/** - * Java Binary Diff utility. Based on bsdiff (v4.2) by Colin Percival (see - * http://www.daemonology.net/bsdiff/ ) and distributed under BSD license. - * - * <p> - * Running this on large files will probably require an increase of the default - * maximum heap size (use java -Xmx200m) - * </p> - * - * @author Joe Desbonnet, joe@galway.net - * - */ -public class JBDiff { - - // JBDiff extensions by Stefan.Liebig@compeople.de: - // - // - uses GZIP compressor to compress ALL of the blocks (ctrl,diff,extra). - // - added interfaces that allows using of JBDiff with streams and byte - // arrays. - -// private static final String VERSION = "jbdiff-0.1.0.1"; - - // This is ´jbdiff40´. - private static final byte[] MAGIC_BYTES = new byte[] { 0x6a, 0x62, 0x64, - 0x69, 0x66, 0x66, 0x34, 0x30 }; - - private final static void split(int[] I, int[] V, int start, int len, int h) { - - int i, j, k, x, tmp, jj, kk; - - if (len < 16) { - for (k = start; k < start + len; k += j) { - j = 1; - x = V[I[k] + h]; - for (i = 1; k + i < start + len; i++) { - if (V[I[k + i] + h] < x) { - x = V[I[k + i] + h]; - j = 0; - } - - if (V[I[k + i] + h] == x) { - tmp = I[k + j]; - I[k + j] = I[k + i]; - I[k + i] = tmp; - j++; - } - - } - - for (i = 0; i < j; i++) { - V[I[k + i]] = k + j - 1; - } - if (j == 1) { - I[k] = -1; - } - } - - return; - } - - x = V[I[start + len / 2] + h]; - jj = 0; - kk = 0; - for (i = start; i < start + len; i++) { - if (V[I[i] + h] < x) { - jj++; - } - if (V[I[i] + h] == x) { - kk++; - } - } - - jj += start; - kk += jj; - - i = start; - j = 0; - k = 0; - while (i < jj) { - if (V[I[i] + h] < x) { - i++; - } else if (V[I[i] + h] == x) { - tmp = I[i]; - I[i] = I[jj + j]; - I[jj + j] = tmp; - j++; - } else { - tmp = I[i]; - I[i] = I[kk + k]; - I[kk + k] = tmp; - k++; - } - - } - - while (jj + j < kk) { - if (V[I[jj + j] + h] == x) { - j++; - } else { - tmp = I[jj + j]; - I[jj + j] = I[kk + k]; - I[kk + k] = tmp; - k++; - } - - } - - if (jj > start) { - split(I, V, start, jj - start, h); - } - - for (i = 0; i < kk - jj; i++) { - V[I[jj + i]] = kk - 1; - } - - if (jj == kk - 1) { - I[jj] = -1; - } - - if (start + len > kk) { - split(I, V, kk, start + len - kk, h); - } - - } - - /** - * Fast suffix sporting. Larsson and Sadakane's qsufsort algorithm. See - * http://www.cs.lth.se/Research/Algorithms/Papers/jesper5.ps - * - * @param I - * @param V - * @param oldBuf - * @param oldsize - */ - private static void qsufsort(int[] I, int[] V, byte[] oldBuf, int oldsize) { - - // int oldsize = oldBuf.length; - int[] buckets = new int[256]; - - // No need to do that in Java. - // for ( int i = 0; i < 256; i++ ) { - // buckets[i] = 0; - // } - - for (int i = 0; i < oldsize; i++) { - buckets[oldBuf[i] & 0xff]++; - } - - for (int i = 1; i < 256; i++) { - buckets[i] += buckets[i - 1]; - } - - for (int i = 255; i > 0; i--) { - buckets[i] = buckets[i - 1]; - } - - buckets[0] = 0; - - for (int i = 0; i < oldsize; i++) { - I[++buckets[oldBuf[i] & 0xff]] = i; - } - - I[0] = oldsize; - for (int i = 0; i < oldsize; i++) { - V[i] = buckets[oldBuf[i] & 0xff]; - } - V[oldsize] = 0; - - for (int i = 1; i < 256; i++) { - if (buckets[i] == buckets[i - 1] + 1) { - I[buckets[i]] = -1; - } - } - - I[0] = -1; - - for (int h = 1; I[0] != -(oldsize + 1); h += h) { - int len = 0; - int i; - for (i = 0; i < oldsize + 1;) { - if (I[i] < 0) { - len -= I[i]; - i -= I[i]; - } else { - // if(len) I[i-len]=-len; - if (len != 0) { - I[i - len] = -len; - } - len = V[I[i]] + 1 - i; - split(I, V, i, len, h); - i += len; - len = 0; - } - - } - - if (len != 0) { - I[i - len] = -len; - } - } - - for (int i = 0; i < oldsize + 1; i++) { - I[V[i]] = i; - } - } - - /** - * Count the number of bytes that match in oldBuf (starting at offset - * oldOffset) and newBuf (starting at offset newOffset). - * - * @param oldBuf - * @param oldOffset - * @param newBuf - * @param newOffset - * @return - */ - private final static int matchlen(byte[] oldBuf, int oldSize, - int oldOffset, byte[] newBuf, int newSize, int newOffset) { - // int end = Math - // .min(oldBuf.length - oldOffset, newBuf.length - newOffset); - int end = Math.min(oldSize - oldOffset, newSize - newOffset); - for (int i = 0; i < end; i++) { - if (oldBuf[oldOffset + i] != newBuf[newOffset + i]) { - return i; - } - } - return end; - } - - private final static int search(int[] I, byte[] oldBuf, int oldSize, - byte[] newBuf, int newSize, int newBufOffset, int start, int end, - IntByRef pos) { - - if (end - start < 2) { - int x = matchlen(oldBuf, oldSize, I[start], newBuf, newSize, - newBufOffset); - int y = matchlen(oldBuf, oldSize, I[end], newBuf, newSize, - newBufOffset); - - if (x > y) { - pos.value = I[start]; - return x; - } else { - pos.value = I[end]; - return y; - } - } - - int x = start + (end - start) / 2; - if (Util.memcmp(oldBuf, oldSize, I[x], newBuf, newSize, newBufOffset) < 0) { - return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, x, - end, pos); - } else { - return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, - start, x, pos); - } - - } - - /** - * @param oldFile - * @param newFile - * @param diffFile - * @throws IOException - */ - public static void bsdiff(File oldFile, File newFile, File diffFile) - throws IOException { - InputStream oldInputStream = new BufferedInputStream( - new FileInputStream(oldFile)); - InputStream newInputStream = new BufferedInputStream( - new FileInputStream(newFile)); - OutputStream diffOutputStream = new FileOutputStream(diffFile); - - byte[] diffBytes = bsdiff(oldInputStream, (int) oldFile.length(), - newInputStream, (int) newFile.length()); - - diffOutputStream.write(diffBytes); - diffOutputStream.close(); - } - - /** - * @param oldInputStream - * @param oldsize - * @param newInputStream - * @param newsize - * @return - * @throws IOException - */ - public static byte[] bsdiff(InputStream oldInputStream, int oldsize, - InputStream newInputStream, int newsize) throws IOException { - - byte[] oldBuf = new byte[oldsize]; - - Util.readFromStream(oldInputStream, oldBuf, 0, oldsize); - oldInputStream.close(); - - byte[] newBuf = new byte[newsize]; - Util.readFromStream(newInputStream, newBuf, 0, newsize); - newInputStream.close(); - - return bsdiff(oldBuf, oldsize, newBuf, newsize); - } - - /** - * @param oldBuf - * @param oldsize - * @param newBuf - * @param newsize - * @return - * @throws IOException - */ - public static byte[] bsdiff(byte[] oldBuf, int oldsize, byte[] newBuf, - int newsize) throws IOException { - - int[] I = new int[oldsize + 1]; - qsufsort(I, new int[oldsize + 1], oldBuf, oldsize); - - // diff block - int dblen = 0; - byte[] db = new byte[newsize]; - - // extra block - int eblen = 0; - byte[] eb = new byte[newsize]; - - /* - * Diff file is composed as follows: - * - * Header (32 bytes) Data (from offset 32 to end of file) - * - * Header: Offset 0, length 8 bytes: file magic "jbdiff40" Offset 8, - * length 8 bytes: length of compressed ctrl block Offset 16, length 8 - * bytes: length of compressed diff block Offset 24, length 8 bytes: - * length of new file - * - * Data: 32 (length ctrlBlockLen): ctrlBlock (bzip2) 32+ctrlBlockLen - * (length diffBlockLen): diffBlock (bzip2) 32+ctrlBlockLen+diffBlockLen - * (to end of file): extraBlock (bzip2) - * - * ctrlBlock comprises a set of records, each record 12 bytes. A record - * comprises 3 x 32 bit integers. The ctrlBlock is not compressed. - */ - - ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); - DataOutputStream diffOut = new DataOutputStream(byteOut); - - /* - * Write as much of header as we have now. Size of ctrlBlock and - * diffBlock must be filled in later. - */ - diffOut.write(MAGIC_BYTES); - diffOut.writeLong(-1); // place holder for ctrlBlockLen - diffOut.writeLong(-1); // place holder for diffBlockLen - diffOut.writeLong(newsize); - diffOut.flush(); - - GZIPOutputStream bzip2Out = new GZIPOutputStream(diffOut); - DataOutputStream dataOut = new DataOutputStream(bzip2Out); - - int oldscore, scsc; - - int overlap, Ss, lens; - int i; - int scan = 0; - int len = 0; - int lastscan = 0; - int lastpos = 0; - int lastoffset = 0; - - IntByRef pos = new IntByRef(); - // int ctrlBlockLen = 0; - - while (scan < newsize) { - oldscore = 0; - - for (scsc = scan += len; scan < newsize; scan++) { - - len = search(I, oldBuf, oldsize, newBuf, newsize, scan, 0, - oldsize, pos); - - for (; scsc < scan + len; scsc++) { - if ((scsc + lastoffset < oldsize) - && (oldBuf[scsc + lastoffset] == newBuf[scsc])) { - oldscore++; - } - } - - if (((len == oldscore) && (len != 0)) || (len > oldscore + 8)) { - break; - } - - if ((scan + lastoffset < oldsize) - && (oldBuf[scan + lastoffset] == newBuf[scan])) { - oldscore--; - } - } - - if ((len != oldscore) || (scan == newsize)) { - int s = 0; - int Sf = 0; - int lenf = 0; - for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) { - if (oldBuf[lastpos + i] == newBuf[lastscan + i]) - s++; - i++; - if (s * 2 - i > Sf * 2 - lenf) { - Sf = s; - lenf = i; - } - } - - int lenb = 0; - if (scan < newsize) { - s = 0; - int Sb = 0; - for (i = 1; (scan >= lastscan + i) && (pos.value >= i); i++) { - if (oldBuf[pos.value - i] == newBuf[scan - i]) - s++; - if (s * 2 - i > Sb * 2 - lenb) { - Sb = s; - lenb = i; - } - } - } - - if (lastscan + lenf > scan - lenb) { - overlap = (lastscan + lenf) - (scan - lenb); - s = 0; - Ss = 0; - lens = 0; - for (i = 0; i < overlap; i++) { - if (newBuf[lastscan + lenf - overlap + i] == oldBuf[lastpos - + lenf - overlap + i]) { - s++; - } - if (newBuf[scan - lenb + i] == oldBuf[pos.value - lenb - + i]) { - s--; - } - if (s > Ss) { - Ss = s; - lens = i + 1; - } - } - - lenf += lens - overlap; - lenb -= lens; - } - - // ? byte casting introduced here -- might affect things - for (i = 0; i < lenf; i++) { - db[dblen + i] = (byte) (newBuf[lastscan + i] - oldBuf[lastpos - + i]); - } - - for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) { - eb[eblen + i] = newBuf[lastscan + lenf + i]; - } - - dblen += lenf; - eblen += (scan - lenb) - (lastscan + lenf); - - /* - * Write control block entry (3 x int) - */ - // diffOut.writeInt( lenf ); - // diffOut.writeInt( ( scan - lenb ) - ( lastscan + lenf ) ); - // diffOut.writeInt( ( pos[0] - lenb ) - ( lastpos + lenf ) ); - // ctrlBlockLen += 12; - dataOut.writeInt(lenf); - dataOut.writeInt((scan - lenb) - (lastscan + lenf)); - dataOut.writeInt((pos.value - lenb) - (lastpos + lenf)); - - lastscan = scan - lenb; - lastpos = pos.value - lenb; - lastoffset = pos.value - scan; - } // end if - } // end while loop - - dataOut.flush(); - bzip2Out.finish(); - - // now compressed ctrlBlockLen - int ctrlBlockLen = diffOut.size() - Util.HEADER_SIZE; - // System.err.println( "Diff: ctrlBlockLen=" + ctrlBlockLen ); - - // GZIPOutputStream gzOut; - - /* - * Write diff block - */ - // gzOut = new GZIPOutputStream( diffOut ); - bzip2Out = new GZIPOutputStream(diffOut); - bzip2Out.write(db, 0, dblen); - bzip2Out.finish(); - bzip2Out.flush(); - int diffBlockLen = diffOut.size() - ctrlBlockLen - Util.HEADER_SIZE; - // System.err.println( "Diff: diffBlockLen=" + diffBlockLen ); - - /* - * Write extra block - */ - // gzOut = new GZIPOutputStream( diffOut ); - bzip2Out = new GZIPOutputStream(diffOut); - bzip2Out.write(eb, 0, eblen); - bzip2Out.finish(); - bzip2Out.flush(); - // long extraBlockLen = diffOut.size() - diffBlockLen - ctrlBlockLen - - // HEADER_SIZE; - // System.err.println( "Diff: extraBlockLen=" + extraBlockLen ); - - diffOut.close(); - - /* - * Write missing header info. - */ - ByteArrayOutputStream byteHeaderOut = new ByteArrayOutputStream( - Util.HEADER_SIZE); - DataOutputStream headerOut = new DataOutputStream(byteHeaderOut); - headerOut.write(MAGIC_BYTES); - headerOut.writeLong(ctrlBlockLen); // place holder for ctrlBlockLen - headerOut.writeLong(diffBlockLen); // place holder for diffBlockLen - headerOut.writeLong(newsize); - headerOut.close(); - - // Copy header information into the diff - byte[] diffBytes = byteOut.toByteArray(); - byte[] headerBytes = byteHeaderOut.toByteArray(); - - System.arraycopy(headerBytes, 0, diffBytes, 0, headerBytes.length); - - return diffBytes; - // /* - // * Write missing header info. Need to reopen the file with - // RandomAccessFile - // * for this. - // */ - // RandomAccessFile diff = new RandomAccessFile( diffFile, "rw" ); - // diff.seek( 8 ); - // diff.writeLong( ctrlBlockLen ); // ctrlBlockLen (compressed) @offset - // 8 - // diff.writeLong( diffBlockLen ); // diffBlockLen (compressed) @offset - // 16 - // diff.close(); - } - - /** - * Run JBDiff from the command line. Params: oldfile newfile difffile. diff - * file will be created. - * - * @param arg - * @throws IOException - */ - public static void main(String[] arg) throws IOException { - - if (arg.length != 3) { - System.err - .println("usage example: java -Xmx200m ie.wombat.jbdiff.JBDiff oldfile newfile patchfile\n"); - return; - } - - File oldFile = new File(arg[0]); - File newFile = new File(arg[1]); - File diffFile = new File(arg[2]); - - bsdiff(oldFile, newFile, diffFile); - - } - - private static class IntByRef { - private int value; - } -} |