summaryrefslogtreecommitdiffstatsabout
diff options
context:
space:
mode:
authorJeff Schumacher2010-06-29 19:04:08 (EDT)
committer Shawn O. Pearce2010-06-29 20:23:00 (EDT)
commit9f2249bd26199f6b79b72bff8328e18a0935191b (patch)
treecef1bdcd51e68578dd8b17b08273098423b56d29
parent730b708dae88a79cb3d926fc96ada377516c583c (diff)
downloadjgit-9f2249bd26199f6b79b72bff8328e18a0935191b.zip
jgit-9f2249bd26199f6b79b72bff8328e18a0935191b.tar.gz
jgit-9f2249bd26199f6b79b72bff8328e18a0935191b.tar.bz2
Added check for binary files while diffingrefs/changes/01/1001/2
Added a check in Diff to ensure that files that are most likely not text are not line-by-line diffed. Files are determined to be binary by checking the first 8000 bytes for a null character. This is a similar heuristic to what C Git uses. Change-Id: I2b6f05674c88d89b3f549a5db483f850f7f46c26
-rw-r--r--org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java23
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java21
2 files changed, 38 insertions, 6 deletions
diff --git a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java
index 931c46d..fc1e400 100644
--- a/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java
+++ b/org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Diff.java
@@ -132,16 +132,28 @@ class Diff extends TextBuiltin {
+ (mode1.equals(mode2) ? " " + mode1 : ""));
out.println("--- " + (isNew ? "/dev/null" : name1));
out.println("+++ " + (isDelete ? "/dev/null" : name2));
- RawText a = getRawText(id1);
- RawText b = getRawText(id2);
+
+ byte[] aRaw = getRawBytes(id1);
+ byte[] bRaw = getRawBytes(id2);
+
+ if (RawText.isBinary(aRaw) || RawText.isBinary(bRaw)) {
+ out.println("Binary files differ");
+ return;
+ }
+
+ RawText a = getRawText(aRaw);
+ RawText b = getRawText(bRaw);
MyersDiff diff = new MyersDiff(a, b);
fmt.formatEdits(out, a, b, diff.getEdits());
}
- private RawText getRawText(ObjectId id) throws IOException {
+ private byte[] getRawBytes(ObjectId id) throws IOException {
if (id.equals(ObjectId.zeroId()))
- return new RawText(new byte[] {});
- byte[] raw = db.openBlob(id).getCachedBytes();
+ return new byte[] {};
+ return db.openBlob(id).getCachedBytes();
+ }
+
+ private RawText getRawText(byte[] raw) {
if (ignoreWsAll)
return new RawTextIgnoreAllWhitespace(raw);
else if (ignoreWsTrailing)
@@ -154,4 +166,3 @@ class Diff extends TextBuiltin {
return new RawText(raw);
}
}
-
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
index c785534..c01cb7a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java
@@ -65,6 +65,9 @@ import org.eclipse.jgit.util.RawParseUtils;
* they are converting from "line number" to "element index".
*/
public class RawText implements Sequence {
+ /** Number of bytes to check for heuristics in {@link #isBinary(byte[])} */
+ private static final int FIRST_FEW_BYTES = 8000;
+
/** The file content for this sequence. */
protected final byte[] content;
@@ -202,4 +205,22 @@ public class RawText implements Sequence {
hash = (hash << 5) ^ (raw[ptr] & 0xff);
return hash;
}
+
+ /**
+ * Determine heuristically whether a byte array represents binary (as
+ * opposed to text) content.
+ *
+ * @param raw
+ * the raw file content.
+ * @return true if raw is likely to be a binary file, false otherwise
+ */
+ public static boolean isBinary(byte[] raw) {
+ // Same heuristic as C Git
+ int size = raw.length > FIRST_FEW_BYTES ? FIRST_FEW_BYTES : raw.length;
+ for (int ptr = 0; ptr < size; ptr++)
+ if (raw[ptr] == '\0')
+ return true;
+
+ return false;
+ }
}