Skip to main content
aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java')
-rw-r--r--bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java604
1 files changed, 604 insertions, 0 deletions
diff --git a/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java b/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java
new file mode 100644
index 000000000..cee25fe65
--- /dev/null
+++ b/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java
@@ -0,0 +1,604 @@
+/*
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+package ie.wombat.jbdiff;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+import org.apache.tools.bzip2.CBZip2OutputStream;
+
+/**
+ * Java Binary Diff utility. Based on bsdiff (v4.2) by Colin Percival (see
+ * http://www.daemonology.net/bsdiff/ ) and distributed under BSD license.
+ *
+ * <p>
+ * Running this on large files will probably require an increase of the default
+ * maximum heap size (use java -Xmx200m)
+ * </p>
+ *
+ * @author Joe Desbonnet, joe@galway.net
+ *
+ */
+public class JBDiff {
+
+ // JBDiff extensions by Stefan.Liebig@compeople.de:
+ //
+ // - uses an extended version of the org.apache.tools.bzip2 compressor to
+ // compress all of the blocks (ctrl,diff,extra).
+ // - added interfaces that allows using of JBDiff with streams and byte
+ // arrays.
+
+ private static final String VERSION = "jbdiff-0.1.0";
+
+ // This is ´jbdiff40´.
+ private static final byte[] MAGIC_BYTES = new byte[] { 0x6a, 0x62, 0x64,
+ 0x69, 0x66, 0x66, 0x34, 0x30 };
+
+ private final static void split(int[] I, int[] V, int start, int len, int h) {
+
+ int i, j, k, x, tmp, jj, kk;
+
+ if (len < 16) {
+ for (k = start; k < start + len; k += j) {
+ j = 1;
+ x = V[I[k] + h];
+ for (i = 1; k + i < start + len; i++) {
+ if (V[I[k + i] + h] < x) {
+ x = V[I[k + i] + h];
+ j = 0;
+ }
+
+ if (V[I[k + i] + h] == x) {
+ tmp = I[k + j];
+ I[k + j] = I[k + i];
+ I[k + i] = tmp;
+ j++;
+ }
+
+ }
+
+ for (i = 0; i < j; i++) {
+ V[I[k + i]] = k + j - 1;
+ }
+ if (j == 1) {
+ I[k] = -1;
+ }
+ }
+
+ return;
+ }
+
+ x = V[I[start + len / 2] + h];
+ jj = 0;
+ kk = 0;
+ for (i = start; i < start + len; i++) {
+ if (V[I[i] + h] < x) {
+ jj++;
+ }
+ if (V[I[i] + h] == x) {
+ kk++;
+ }
+ }
+
+ jj += start;
+ kk += jj;
+
+ i = start;
+ j = 0;
+ k = 0;
+ while (i < jj) {
+ if (V[I[i] + h] < x) {
+ i++;
+ } else if (V[I[i] + h] == x) {
+ tmp = I[i];
+ I[i] = I[jj + j];
+ I[jj + j] = tmp;
+ j++;
+ } else {
+ tmp = I[i];
+ I[i] = I[kk + k];
+ I[kk + k] = tmp;
+ k++;
+ }
+
+ }
+
+ while (jj + j < kk) {
+ if (V[I[jj + j] + h] == x) {
+ j++;
+ } else {
+ tmp = I[jj + j];
+ I[jj + j] = I[kk + k];
+ I[kk + k] = tmp;
+ k++;
+ }
+
+ }
+
+ if (jj > start) {
+ split(I, V, start, jj - start, h);
+ }
+
+ for (i = 0; i < kk - jj; i++) {
+ V[I[jj + i]] = kk - 1;
+ }
+
+ if (jj == kk - 1) {
+ I[jj] = -1;
+ }
+
+ if (start + len > kk) {
+ split(I, V, kk, start + len - kk, h);
+ }
+
+ }
+
+ /**
+ * Fast suffix sporting. Larsson and Sadakane's qsufsort algorithm. See
+ * http://www.cs.lth.se/Research/Algorithms/Papers/jesper5.ps
+ *
+ * @param I
+ * @param V
+ * @param oldBuf
+ * @param oldsize
+ */
+ private static void qsufsort(int[] I, int[] V, byte[] oldBuf, int oldsize) {
+
+ // int oldsize = oldBuf.length;
+ int[] buckets = new int[256];
+
+ // No need to do that in Java.
+ // for ( int i = 0; i < 256; i++ ) {
+ // buckets[i] = 0;
+ // }
+
+ for (int i = 0; i < oldsize; i++) {
+ buckets[oldBuf[i] & 0xff]++;
+ }
+
+ for (int i = 1; i < 256; i++) {
+ buckets[i] += buckets[i - 1];
+ }
+
+ for (int i = 255; i > 0; i--) {
+ buckets[i] = buckets[i - 1];
+ }
+
+ buckets[0] = 0;
+
+ for (int i = 0; i < oldsize; i++) {
+ I[++buckets[oldBuf[i] & 0xff]] = i;
+ }
+
+ I[0] = oldsize;
+ for (int i = 0; i < oldsize; i++) {
+ V[i] = buckets[oldBuf[i] & 0xff];
+ }
+ V[oldsize] = 0;
+
+ for (int i = 1; i < 256; i++) {
+ if (buckets[i] == buckets[i - 1] + 1) {
+ I[buckets[i]] = -1;
+ }
+ }
+
+ I[0] = -1;
+
+ for (int h = 1; I[0] != -(oldsize + 1); h += h) {
+ int len = 0;
+ int i;
+ for (i = 0; i < oldsize + 1;) {
+ if (I[i] < 0) {
+ len -= I[i];
+ i -= I[i];
+ } else {
+ // if(len) I[i-len]=-len;
+ if (len != 0) {
+ I[i - len] = -len;
+ }
+ len = V[I[i]] + 1 - i;
+ split(I, V, i, len, h);
+ i += len;
+ len = 0;
+ }
+
+ }
+
+ if (len != 0) {
+ I[i - len] = -len;
+ }
+ }
+
+ for (int i = 0; i < oldsize + 1; i++) {
+ I[V[i]] = i;
+ }
+ }
+
+ /**
+ * Count the number of bytes that match in oldBuf (starting at offset
+ * oldOffset) and newBuf (starting at offset newOffset).
+ *
+ * @param oldBuf
+ * @param oldOffset
+ * @param newBuf
+ * @param newOffset
+ * @return
+ */
+ private final static int matchlen(byte[] oldBuf, int oldSize,
+ int oldOffset, byte[] newBuf, int newSize, int newOffset) {
+ // int end = Math
+ // .min(oldBuf.length - oldOffset, newBuf.length - newOffset);
+ int end = Math.min(oldSize - oldOffset, newSize - newOffset);
+ for (int i = 0; i < end; i++) {
+ if (oldBuf[oldOffset + i] != newBuf[newOffset + i]) {
+ return i;
+ }
+ }
+ return end;
+ }
+
+ private final static int search(int[] I, byte[] oldBuf, int oldSize,
+ byte[] newBuf, int newSize, int newBufOffset, int start, int end,
+ IntByRef pos) {
+
+ if (end - start < 2) {
+ int x = matchlen(oldBuf, oldSize, I[start], newBuf, newSize,
+ newBufOffset);
+ int y = matchlen(oldBuf, oldSize, I[end], newBuf, newSize,
+ newBufOffset);
+
+ if (x > y) {
+ pos.value = I[start];
+ return x;
+ } else {
+ pos.value = I[end];
+ return y;
+ }
+ }
+
+ int x = start + (end - start) / 2;
+ if (Util.memcmp(oldBuf, oldSize, I[x], newBuf, newSize, newBufOffset) < 0) {
+ return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, x,
+ end, pos);
+ } else {
+ return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset,
+ start, x, pos);
+ }
+
+ }
+
+ /**
+ * @param oldFile
+ * @param newFile
+ * @param diffFile
+ * @throws IOException
+ */
+ public static void bsdiff(File oldFile, File newFile, File diffFile)
+ throws IOException {
+ InputStream oldInputStream = new BufferedInputStream(
+ new FileInputStream(oldFile));
+ InputStream newInputStream = new BufferedInputStream(
+ new FileInputStream(newFile));
+ OutputStream diffOutputStream = new FileOutputStream(diffFile);
+
+ byte[] diffBytes = bsdiff(oldInputStream, (int) oldFile.length(),
+ newInputStream, (int) newFile.length());
+
+ diffOutputStream.write(diffBytes);
+ diffOutputStream.close();
+ }
+
+ /**
+ * @param oldInputStream
+ * @param oldsize
+ * @param newInputStream
+ * @param newsize
+ * @return
+ * @throws IOException
+ */
+ public static byte[] bsdiff(InputStream oldInputStream, int oldsize,
+ InputStream newInputStream, int newsize) throws IOException {
+
+ byte[] oldBuf = new byte[oldsize];
+
+ Util.readFromStream(oldInputStream, oldBuf, 0, oldsize);
+ oldInputStream.close();
+
+ byte[] newBuf = new byte[newsize];
+ Util.readFromStream(newInputStream, newBuf, 0, newsize);
+ newInputStream.close();
+
+ return bsdiff(oldBuf, oldsize, newBuf, newsize);
+ }
+
+ /**
+ * @param oldBuf
+ * @param oldsize
+ * @param newBuf
+ * @param newsize
+ * @return
+ * @throws IOException
+ */
+ public static byte[] bsdiff(byte[] oldBuf, int oldsize, byte[] newBuf,
+ int newsize) throws IOException {
+
+ int[] I = new int[oldsize + 1];
+ qsufsort(I, new int[oldsize + 1], oldBuf, oldsize);
+
+ // diff block
+ int dblen = 0;
+ byte[] db = new byte[newsize];
+
+ // extra block
+ int eblen = 0;
+ byte[] eb = new byte[newsize];
+
+ /*
+ * Diff file is composed as follows:
+ *
+ * Header (32 bytes) Data (from offset 32 to end of file)
+ *
+ * Header: Offset 0, length 8 bytes: file magic "jbdiff40" Offset 8,
+ * length 8 bytes: length of compressed ctrl block Offset 16, length 8
+ * bytes: length of compressed diff block Offset 24, length 8 bytes:
+ * length of new file
+ *
+ * Data: 32 (length ctrlBlockLen): ctrlBlock (bzip2) 32+ctrlBlockLen
+ * (length diffBlockLen): diffBlock (bzip2) 32+ctrlBlockLen+diffBlockLen
+ * (to end of file): extraBlock (bzip2)
+ *
+ * ctrlBlock comprises a set of records, each record 12 bytes. A record
+ * comprises 3 x 32 bit integers. The ctrlBlock is not compressed.
+ */
+
+ ByteArrayOutputStream byteOut = new ByteArrayOutputStream();
+ DataOutputStream diffOut = new DataOutputStream(byteOut);
+
+ /*
+ * Write as much of header as we have now. Size of ctrlBlock and
+ * diffBlock must be filled in later.
+ */
+ diffOut.write(MAGIC_BYTES);
+ diffOut.writeLong(-1); // place holder for ctrlBlockLen
+ diffOut.writeLong(-1); // place holder for diffBlockLen
+ diffOut.writeLong(newsize);
+ diffOut.flush();
+
+ CBZip2OutputStream bzip2Out = new CBZip2OutputStream(diffOut);
+ DataOutputStream dataOut = new DataOutputStream(bzip2Out);
+
+ int oldscore, scsc;
+
+ int overlap, Ss, lens;
+ int i;
+ int scan = 0;
+ int len = 0;
+ int lastscan = 0;
+ int lastpos = 0;
+ int lastoffset = 0;
+
+ IntByRef pos = new IntByRef();
+ // int ctrlBlockLen = 0;
+
+ while (scan < newsize) {
+ oldscore = 0;
+
+ for (scsc = scan += len; scan < newsize; scan++) {
+
+ len = search(I, oldBuf, oldsize, newBuf, newsize, scan, 0,
+ oldsize, pos);
+
+ for (; scsc < scan + len; scsc++) {
+ if ((scsc + lastoffset < oldsize)
+ && (oldBuf[scsc + lastoffset] == newBuf[scsc])) {
+ oldscore++;
+ }
+ }
+
+ if (((len == oldscore) && (len != 0)) || (len > oldscore + 8)) {
+ break;
+ }
+
+ if ((scan + lastoffset < oldsize)
+ && (oldBuf[scan + lastoffset] == newBuf[scan])) {
+ oldscore--;
+ }
+ }
+
+ if ((len != oldscore) || (scan == newsize)) {
+ int s = 0;
+ int Sf = 0;
+ int lenf = 0;
+ for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) {
+ if (oldBuf[lastpos + i] == newBuf[lastscan + i])
+ s++;
+ i++;
+ if (s * 2 - i > Sf * 2 - lenf) {
+ Sf = s;
+ lenf = i;
+ }
+ }
+
+ int lenb = 0;
+ if (scan < newsize) {
+ s = 0;
+ int Sb = 0;
+ for (i = 1; (scan >= lastscan + i) && (pos.value >= i); i++) {
+ if (oldBuf[pos.value - i] == newBuf[scan - i])
+ s++;
+ if (s * 2 - i > Sb * 2 - lenb) {
+ Sb = s;
+ lenb = i;
+ }
+ }
+ }
+
+ if (lastscan + lenf > scan - lenb) {
+ overlap = (lastscan + lenf) - (scan - lenb);
+ s = 0;
+ Ss = 0;
+ lens = 0;
+ for (i = 0; i < overlap; i++) {
+ if (newBuf[lastscan + lenf - overlap + i] == oldBuf[lastpos
+ + lenf - overlap + i]) {
+ s++;
+ }
+ if (newBuf[scan - lenb + i] == oldBuf[pos.value - lenb
+ + i]) {
+ s--;
+ }
+ if (s > Ss) {
+ Ss = s;
+ lens = i + 1;
+ }
+ }
+
+ lenf += lens - overlap;
+ lenb -= lens;
+ }
+
+ // ? byte casting introduced here -- might affect things
+ for (i = 0; i < lenf; i++) {
+ db[dblen + i] = (byte) (newBuf[lastscan + i] - oldBuf[lastpos
+ + i]);
+ }
+
+ for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) {
+ eb[eblen + i] = newBuf[lastscan + lenf + i];
+ }
+
+ dblen += lenf;
+ eblen += (scan - lenb) - (lastscan + lenf);
+
+ /*
+ * Write control block entry (3 x int)
+ */
+ // diffOut.writeInt( lenf );
+ // diffOut.writeInt( ( scan - lenb ) - ( lastscan + lenf ) );
+ // diffOut.writeInt( ( pos[0] - lenb ) - ( lastpos + lenf ) );
+ // ctrlBlockLen += 12;
+ dataOut.writeInt(lenf);
+ dataOut.writeInt((scan - lenb) - (lastscan + lenf));
+ dataOut.writeInt((pos.value - lenb) - (lastpos + lenf));
+
+ lastscan = scan - lenb;
+ lastpos = pos.value - lenb;
+ lastoffset = pos.value - scan;
+ } // end if
+ } // end while loop
+
+ dataOut.flush();
+ bzip2Out.finish();
+
+ // now compressed ctrlBlockLen
+ int ctrlBlockLen = diffOut.size() - Util.HEADER_SIZE;
+ // System.err.println( "Diff: ctrlBlockLen=" + ctrlBlockLen );
+
+ // GZIPOutputStream gzOut;
+
+ /*
+ * Write diff block
+ */
+ // gzOut = new GZIPOutputStream( diffOut );
+ bzip2Out = new CBZip2OutputStream(diffOut);
+ bzip2Out.write(db, 0, dblen);
+ bzip2Out.finish();
+ bzip2Out.flush();
+ int diffBlockLen = diffOut.size() - ctrlBlockLen - Util.HEADER_SIZE;
+ // System.err.println( "Diff: diffBlockLen=" + diffBlockLen );
+
+ /*
+ * Write extra block
+ */
+ // gzOut = new GZIPOutputStream( diffOut );
+ bzip2Out = new CBZip2OutputStream(diffOut);
+ bzip2Out.write(eb, 0, eblen);
+ bzip2Out.finish();
+ bzip2Out.flush();
+ // long extraBlockLen = diffOut.size() - diffBlockLen - ctrlBlockLen -
+ // HEADER_SIZE;
+ // System.err.println( "Diff: extraBlockLen=" + extraBlockLen );
+
+ diffOut.close();
+
+ /*
+ * Write missing header info.
+ */
+ ByteArrayOutputStream byteHeaderOut = new ByteArrayOutputStream(
+ Util.HEADER_SIZE);
+ DataOutputStream headerOut = new DataOutputStream(byteHeaderOut);
+ headerOut.write(MAGIC_BYTES);
+ headerOut.writeLong(ctrlBlockLen); // place holder for ctrlBlockLen
+ headerOut.writeLong(diffBlockLen); // place holder for diffBlockLen
+ headerOut.writeLong(newsize);
+ headerOut.close();
+
+ // Copy header information into the diff
+ byte[] diffBytes = byteOut.toByteArray();
+ byte[] headerBytes = byteHeaderOut.toByteArray();
+
+ System.arraycopy(headerBytes, 0, diffBytes, 0, headerBytes.length);
+
+ return diffBytes;
+ // /*
+ // * Write missing header info. Need to reopen the file with
+ // RandomAccessFile
+ // * for this.
+ // */
+ // RandomAccessFile diff = new RandomAccessFile( diffFile, "rw" );
+ // diff.seek( 8 );
+ // diff.writeLong( ctrlBlockLen ); // ctrlBlockLen (compressed) @offset
+ // 8
+ // diff.writeLong( diffBlockLen ); // diffBlockLen (compressed) @offset
+ // 16
+ // diff.close();
+ }
+
+ /**
+ * Run JBDiff from the command line. Params: oldfile newfile difffile. diff
+ * file will be created.
+ *
+ * @param arg
+ * @throws IOException
+ */
+ public static void main(String[] arg) throws IOException {
+
+ if (arg.length != 3) {
+ System.err
+ .println("usage example: java -Xmx200m ie.wombat.jbdiff.JBDiff oldfile newfile patchfile\n");
+ return;
+ }
+
+ File oldFile = new File(arg[0]);
+ File newFile = new File(arg[1]);
+ File diffFile = new File(arg[2]);
+
+ bsdiff(oldFile, newFile, diffFile);
+
+ }
+
+ private static class IntByRef {
+ private int value;
+ }
+}

Back to the top