diff options
Diffstat (limited to 'bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java')
-rw-r--r-- | bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java | 604 |
1 files changed, 604 insertions, 0 deletions
diff --git a/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java b/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java new file mode 100644 index 000000000..cee25fe65 --- /dev/null +++ b/bundles/ie.wombat.jbdiff/src/ie/wombat/jbdiff/JBDiff.java @@ -0,0 +1,604 @@ +/* + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +package ie.wombat.jbdiff; + +import java.io.BufferedInputStream; +import java.io.ByteArrayOutputStream; +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import org.apache.tools.bzip2.CBZip2OutputStream; + +/** + * Java Binary Diff utility. Based on bsdiff (v4.2) by Colin Percival (see + * http://www.daemonology.net/bsdiff/ ) and distributed under BSD license. + * + * <p> + * Running this on large files will probably require an increase of the default + * maximum heap size (use java -Xmx200m) + * </p> + * + * @author Joe Desbonnet, joe@galway.net + * + */ +public class JBDiff { + + // JBDiff extensions by Stefan.Liebig@compeople.de: + // + // - uses an extended version of the org.apache.tools.bzip2 compressor to + // compress all of the blocks (ctrl,diff,extra). + // - added interfaces that allows using of JBDiff with streams and byte + // arrays. + + private static final String VERSION = "jbdiff-0.1.0"; + + // This is ´jbdiff40´. + private static final byte[] MAGIC_BYTES = new byte[] { 0x6a, 0x62, 0x64, + 0x69, 0x66, 0x66, 0x34, 0x30 }; + + private final static void split(int[] I, int[] V, int start, int len, int h) { + + int i, j, k, x, tmp, jj, kk; + + if (len < 16) { + for (k = start; k < start + len; k += j) { + j = 1; + x = V[I[k] + h]; + for (i = 1; k + i < start + len; i++) { + if (V[I[k + i] + h] < x) { + x = V[I[k + i] + h]; + j = 0; + } + + if (V[I[k + i] + h] == x) { + tmp = I[k + j]; + I[k + j] = I[k + i]; + I[k + i] = tmp; + j++; + } + + } + + for (i = 0; i < j; i++) { + V[I[k + i]] = k + j - 1; + } + if (j == 1) { + I[k] = -1; + } + } + + return; + } + + x = V[I[start + len / 2] + h]; + jj = 0; + kk = 0; + for (i = start; i < start + len; i++) { + if (V[I[i] + h] < x) { + jj++; + } + if (V[I[i] + h] == x) { + kk++; + } + } + + jj += start; + kk += jj; + + i = start; + j = 0; + k = 0; + while (i < jj) { + if (V[I[i] + h] < x) { + i++; + } else if (V[I[i] + h] == x) { + tmp = I[i]; + I[i] = I[jj + j]; + I[jj + j] = tmp; + j++; + } else { + tmp = I[i]; + I[i] = I[kk + k]; + I[kk + k] = tmp; + k++; + } + + } + + while (jj + j < kk) { + if (V[I[jj + j] + h] == x) { + j++; + } else { + tmp = I[jj + j]; + I[jj + j] = I[kk + k]; + I[kk + k] = tmp; + k++; + } + + } + + if (jj > start) { + split(I, V, start, jj - start, h); + } + + for (i = 0; i < kk - jj; i++) { + V[I[jj + i]] = kk - 1; + } + + if (jj == kk - 1) { + I[jj] = -1; + } + + if (start + len > kk) { + split(I, V, kk, start + len - kk, h); + } + + } + + /** + * Fast suffix sporting. Larsson and Sadakane's qsufsort algorithm. See + * http://www.cs.lth.se/Research/Algorithms/Papers/jesper5.ps + * + * @param I + * @param V + * @param oldBuf + * @param oldsize + */ + private static void qsufsort(int[] I, int[] V, byte[] oldBuf, int oldsize) { + + // int oldsize = oldBuf.length; + int[] buckets = new int[256]; + + // No need to do that in Java. + // for ( int i = 0; i < 256; i++ ) { + // buckets[i] = 0; + // } + + for (int i = 0; i < oldsize; i++) { + buckets[oldBuf[i] & 0xff]++; + } + + for (int i = 1; i < 256; i++) { + buckets[i] += buckets[i - 1]; + } + + for (int i = 255; i > 0; i--) { + buckets[i] = buckets[i - 1]; + } + + buckets[0] = 0; + + for (int i = 0; i < oldsize; i++) { + I[++buckets[oldBuf[i] & 0xff]] = i; + } + + I[0] = oldsize; + for (int i = 0; i < oldsize; i++) { + V[i] = buckets[oldBuf[i] & 0xff]; + } + V[oldsize] = 0; + + for (int i = 1; i < 256; i++) { + if (buckets[i] == buckets[i - 1] + 1) { + I[buckets[i]] = -1; + } + } + + I[0] = -1; + + for (int h = 1; I[0] != -(oldsize + 1); h += h) { + int len = 0; + int i; + for (i = 0; i < oldsize + 1;) { + if (I[i] < 0) { + len -= I[i]; + i -= I[i]; + } else { + // if(len) I[i-len]=-len; + if (len != 0) { + I[i - len] = -len; + } + len = V[I[i]] + 1 - i; + split(I, V, i, len, h); + i += len; + len = 0; + } + + } + + if (len != 0) { + I[i - len] = -len; + } + } + + for (int i = 0; i < oldsize + 1; i++) { + I[V[i]] = i; + } + } + + /** + * Count the number of bytes that match in oldBuf (starting at offset + * oldOffset) and newBuf (starting at offset newOffset). + * + * @param oldBuf + * @param oldOffset + * @param newBuf + * @param newOffset + * @return + */ + private final static int matchlen(byte[] oldBuf, int oldSize, + int oldOffset, byte[] newBuf, int newSize, int newOffset) { + // int end = Math + // .min(oldBuf.length - oldOffset, newBuf.length - newOffset); + int end = Math.min(oldSize - oldOffset, newSize - newOffset); + for (int i = 0; i < end; i++) { + if (oldBuf[oldOffset + i] != newBuf[newOffset + i]) { + return i; + } + } + return end; + } + + private final static int search(int[] I, byte[] oldBuf, int oldSize, + byte[] newBuf, int newSize, int newBufOffset, int start, int end, + IntByRef pos) { + + if (end - start < 2) { + int x = matchlen(oldBuf, oldSize, I[start], newBuf, newSize, + newBufOffset); + int y = matchlen(oldBuf, oldSize, I[end], newBuf, newSize, + newBufOffset); + + if (x > y) { + pos.value = I[start]; + return x; + } else { + pos.value = I[end]; + return y; + } + } + + int x = start + (end - start) / 2; + if (Util.memcmp(oldBuf, oldSize, I[x], newBuf, newSize, newBufOffset) < 0) { + return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, x, + end, pos); + } else { + return search(I, oldBuf, oldSize, newBuf, newSize, newBufOffset, + start, x, pos); + } + + } + + /** + * @param oldFile + * @param newFile + * @param diffFile + * @throws IOException + */ + public static void bsdiff(File oldFile, File newFile, File diffFile) + throws IOException { + InputStream oldInputStream = new BufferedInputStream( + new FileInputStream(oldFile)); + InputStream newInputStream = new BufferedInputStream( + new FileInputStream(newFile)); + OutputStream diffOutputStream = new FileOutputStream(diffFile); + + byte[] diffBytes = bsdiff(oldInputStream, (int) oldFile.length(), + newInputStream, (int) newFile.length()); + + diffOutputStream.write(diffBytes); + diffOutputStream.close(); + } + + /** + * @param oldInputStream + * @param oldsize + * @param newInputStream + * @param newsize + * @return + * @throws IOException + */ + public static byte[] bsdiff(InputStream oldInputStream, int oldsize, + InputStream newInputStream, int newsize) throws IOException { + + byte[] oldBuf = new byte[oldsize]; + + Util.readFromStream(oldInputStream, oldBuf, 0, oldsize); + oldInputStream.close(); + + byte[] newBuf = new byte[newsize]; + Util.readFromStream(newInputStream, newBuf, 0, newsize); + newInputStream.close(); + + return bsdiff(oldBuf, oldsize, newBuf, newsize); + } + + /** + * @param oldBuf + * @param oldsize + * @param newBuf + * @param newsize + * @return + * @throws IOException + */ + public static byte[] bsdiff(byte[] oldBuf, int oldsize, byte[] newBuf, + int newsize) throws IOException { + + int[] I = new int[oldsize + 1]; + qsufsort(I, new int[oldsize + 1], oldBuf, oldsize); + + // diff block + int dblen = 0; + byte[] db = new byte[newsize]; + + // extra block + int eblen = 0; + byte[] eb = new byte[newsize]; + + /* + * Diff file is composed as follows: + * + * Header (32 bytes) Data (from offset 32 to end of file) + * + * Header: Offset 0, length 8 bytes: file magic "jbdiff40" Offset 8, + * length 8 bytes: length of compressed ctrl block Offset 16, length 8 + * bytes: length of compressed diff block Offset 24, length 8 bytes: + * length of new file + * + * Data: 32 (length ctrlBlockLen): ctrlBlock (bzip2) 32+ctrlBlockLen + * (length diffBlockLen): diffBlock (bzip2) 32+ctrlBlockLen+diffBlockLen + * (to end of file): extraBlock (bzip2) + * + * ctrlBlock comprises a set of records, each record 12 bytes. A record + * comprises 3 x 32 bit integers. The ctrlBlock is not compressed. + */ + + ByteArrayOutputStream byteOut = new ByteArrayOutputStream(); + DataOutputStream diffOut = new DataOutputStream(byteOut); + + /* + * Write as much of header as we have now. Size of ctrlBlock and + * diffBlock must be filled in later. + */ + diffOut.write(MAGIC_BYTES); + diffOut.writeLong(-1); // place holder for ctrlBlockLen + diffOut.writeLong(-1); // place holder for diffBlockLen + diffOut.writeLong(newsize); + diffOut.flush(); + + CBZip2OutputStream bzip2Out = new CBZip2OutputStream(diffOut); + DataOutputStream dataOut = new DataOutputStream(bzip2Out); + + int oldscore, scsc; + + int overlap, Ss, lens; + int i; + int scan = 0; + int len = 0; + int lastscan = 0; + int lastpos = 0; + int lastoffset = 0; + + IntByRef pos = new IntByRef(); + // int ctrlBlockLen = 0; + + while (scan < newsize) { + oldscore = 0; + + for (scsc = scan += len; scan < newsize; scan++) { + + len = search(I, oldBuf, oldsize, newBuf, newsize, scan, 0, + oldsize, pos); + + for (; scsc < scan + len; scsc++) { + if ((scsc + lastoffset < oldsize) + && (oldBuf[scsc + lastoffset] == newBuf[scsc])) { + oldscore++; + } + } + + if (((len == oldscore) && (len != 0)) || (len > oldscore + 8)) { + break; + } + + if ((scan + lastoffset < oldsize) + && (oldBuf[scan + lastoffset] == newBuf[scan])) { + oldscore--; + } + } + + if ((len != oldscore) || (scan == newsize)) { + int s = 0; + int Sf = 0; + int lenf = 0; + for (i = 0; (lastscan + i < scan) && (lastpos + i < oldsize);) { + if (oldBuf[lastpos + i] == newBuf[lastscan + i]) + s++; + i++; + if (s * 2 - i > Sf * 2 - lenf) { + Sf = s; + lenf = i; + } + } + + int lenb = 0; + if (scan < newsize) { + s = 0; + int Sb = 0; + for (i = 1; (scan >= lastscan + i) && (pos.value >= i); i++) { + if (oldBuf[pos.value - i] == newBuf[scan - i]) + s++; + if (s * 2 - i > Sb * 2 - lenb) { + Sb = s; + lenb = i; + } + } + } + + if (lastscan + lenf > scan - lenb) { + overlap = (lastscan + lenf) - (scan - lenb); + s = 0; + Ss = 0; + lens = 0; + for (i = 0; i < overlap; i++) { + if (newBuf[lastscan + lenf - overlap + i] == oldBuf[lastpos + + lenf - overlap + i]) { + s++; + } + if (newBuf[scan - lenb + i] == oldBuf[pos.value - lenb + + i]) { + s--; + } + if (s > Ss) { + Ss = s; + lens = i + 1; + } + } + + lenf += lens - overlap; + lenb -= lens; + } + + // ? byte casting introduced here -- might affect things + for (i = 0; i < lenf; i++) { + db[dblen + i] = (byte) (newBuf[lastscan + i] - oldBuf[lastpos + + i]); + } + + for (i = 0; i < (scan - lenb) - (lastscan + lenf); i++) { + eb[eblen + i] = newBuf[lastscan + lenf + i]; + } + + dblen += lenf; + eblen += (scan - lenb) - (lastscan + lenf); + + /* + * Write control block entry (3 x int) + */ + // diffOut.writeInt( lenf ); + // diffOut.writeInt( ( scan - lenb ) - ( lastscan + lenf ) ); + // diffOut.writeInt( ( pos[0] - lenb ) - ( lastpos + lenf ) ); + // ctrlBlockLen += 12; + dataOut.writeInt(lenf); + dataOut.writeInt((scan - lenb) - (lastscan + lenf)); + dataOut.writeInt((pos.value - lenb) - (lastpos + lenf)); + + lastscan = scan - lenb; + lastpos = pos.value - lenb; + lastoffset = pos.value - scan; + } // end if + } // end while loop + + dataOut.flush(); + bzip2Out.finish(); + + // now compressed ctrlBlockLen + int ctrlBlockLen = diffOut.size() - Util.HEADER_SIZE; + // System.err.println( "Diff: ctrlBlockLen=" + ctrlBlockLen ); + + // GZIPOutputStream gzOut; + + /* + * Write diff block + */ + // gzOut = new GZIPOutputStream( diffOut ); + bzip2Out = new CBZip2OutputStream(diffOut); + bzip2Out.write(db, 0, dblen); + bzip2Out.finish(); + bzip2Out.flush(); + int diffBlockLen = diffOut.size() - ctrlBlockLen - Util.HEADER_SIZE; + // System.err.println( "Diff: diffBlockLen=" + diffBlockLen ); + + /* + * Write extra block + */ + // gzOut = new GZIPOutputStream( diffOut ); + bzip2Out = new CBZip2OutputStream(diffOut); + bzip2Out.write(eb, 0, eblen); + bzip2Out.finish(); + bzip2Out.flush(); + // long extraBlockLen = diffOut.size() - diffBlockLen - ctrlBlockLen - + // HEADER_SIZE; + // System.err.println( "Diff: extraBlockLen=" + extraBlockLen ); + + diffOut.close(); + + /* + * Write missing header info. + */ + ByteArrayOutputStream byteHeaderOut = new ByteArrayOutputStream( + Util.HEADER_SIZE); + DataOutputStream headerOut = new DataOutputStream(byteHeaderOut); + headerOut.write(MAGIC_BYTES); + headerOut.writeLong(ctrlBlockLen); // place holder for ctrlBlockLen + headerOut.writeLong(diffBlockLen); // place holder for diffBlockLen + headerOut.writeLong(newsize); + headerOut.close(); + + // Copy header information into the diff + byte[] diffBytes = byteOut.toByteArray(); + byte[] headerBytes = byteHeaderOut.toByteArray(); + + System.arraycopy(headerBytes, 0, diffBytes, 0, headerBytes.length); + + return diffBytes; + // /* + // * Write missing header info. Need to reopen the file with + // RandomAccessFile + // * for this. + // */ + // RandomAccessFile diff = new RandomAccessFile( diffFile, "rw" ); + // diff.seek( 8 ); + // diff.writeLong( ctrlBlockLen ); // ctrlBlockLen (compressed) @offset + // 8 + // diff.writeLong( diffBlockLen ); // diffBlockLen (compressed) @offset + // 16 + // diff.close(); + } + + /** + * Run JBDiff from the command line. Params: oldfile newfile difffile. diff + * file will be created. + * + * @param arg + * @throws IOException + */ + public static void main(String[] arg) throws IOException { + + if (arg.length != 3) { + System.err + .println("usage example: java -Xmx200m ie.wombat.jbdiff.JBDiff oldfile newfile patchfile\n"); + return; + } + + File oldFile = new File(arg[0]); + File newFile = new File(arg[1]); + File diffFile = new File(arg[2]); + + bsdiff(oldFile, newFile, diffFile); + + } + + private static class IntByRef { + private int value; + } +} |