diff options
| author | Jeff Schumacher | 2010-06-29 23:04:08 +0000 |
|---|---|---|
| committer | Shawn O. Pearce | 2010-06-30 00:23:00 +0000 |
| commit | 9f2249bd26199f6b79b72bff8328e18a0935191b (patch) | |
| tree | cef1bdcd51e68578dd8b17b08273098423b56d29 | |
| parent | 730b708dae88a79cb3d926fc96ada377516c583c (diff) | |
| download | jgit-9f2249bd26199f6b79b72bff8328e18a0935191b.tar.gz jgit-9f2249bd26199f6b79b72bff8328e18a0935191b.tar.xz jgit-9f2249bd26199f6b79b72bff8328e18a0935191b.zip | |
Added check for binary files while diffing
Added a check in Diff to ensure that files that are most likely
not text are not line-by-line diffed. Files are determined to be
binary by checking the first 8000 bytes for a null character. This
is a similar heuristic to what C Git uses.
Change-Id: I2b6f05674c88d89b3f549a5db483f850f7f46c26
| -rw-r--r-- | org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java | 23 | ||||
| -rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java | 21 |
2 files changed, 38 insertions, 6 deletions
diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java index 931c46d333..fc1e400ab0 100644 --- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java +++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java @@ -132,16 +132,28 @@ class Diff extends TextBuiltin { + (mode1.equals(mode2) ? " " + mode1 : "")); out.println("--- " + (isNew ? "/dev/null" : name1)); out.println("+++ " + (isDelete ? "/dev/null" : name2)); - RawText a = getRawText(id1); - RawText b = getRawText(id2); + + byte[] aRaw = getRawBytes(id1); + byte[] bRaw = getRawBytes(id2); + + if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) { + out.println("Binary files differ"); + return; + } + + RawText a = getRawText(aRaw); + RawText b = getRawText(bRaw); MyersDiff diff = new MyersDiff(a, b); fmt.formatEdits(out, a, b, diff.getEdits()); } - private RawText getRawText(ObjectId id) throws IOException { + private byte[] getRawBytes(ObjectId id) throws IOException { if (id.equals(ObjectId.zeroId())) - return new RawText(new byte[] {}); - byte[] raw = db.openBlob(id).getCachedBytes(); + return new byte[] {}; + return db.openBlob(id).getCachedBytes(); + } + + private RawText getRawText(byte[] raw) { if (ignoreWsAll) return new RawTextIgnoreAllWhitespace(raw); else if (ignoreWsTrailing) @@ -154,4 +166,3 @@ class Diff extends TextBuiltin { return new RawText(raw); } } - diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java index c785534fbb..c01cb7ad8e 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java @@ -65,6 +65,9 @@ import org.eclipse.jgit.util.RawParseUtils; * they are converting from "line number" to "element index". */ public class RawText implements Sequence { + /** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */ + private static final int FIRST_FEW_BYTES = 8000; + /** The file content for this sequence. */ protected final byte[] content; @@ -202,4 +205,22 @@ public class RawText implements Sequence { hash = (hash << 5) ^ (raw[ptr] & 0xff); return hash; } + + /** + * Determine heuristically whether a byte array represents binary (as + * opposed to text) content. + * + * @param raw + * the raw file content. + * @return true if raw is likely to be a binary file, false otherwise + */ + public static boolean isBinary(byte[] raw) { + // Same heuristic as C Git + int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length; + for (int ptr = 0; ptr < size; ptr++) + if (raw[ptr] == '\0') + return true; + + return false; + } } |
