Skip to main content
aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYoussef Elghareeb2021-05-12 13:35:45 +0000
committerYoussef Elghareeb2021-05-31 11:48:37 +0000
commit1788b72d1af819dee6f371af9e1b0667f0ed8a64 (patch)
treee3cde80a1ac9a439fdc506ccc610130a71b15c08
parent46a702ef418e9f7723dac10d94460a4f3c0a689a (diff)
downloadjgit-1788b72d1af819dee6f371af9e1b0667f0ed8a64.tar.gz
jgit-1788b72d1af819dee6f371af9e1b0667f0ed8a64.tar.xz
jgit-1788b72d1af819dee6f371af9e1b0667f0ed8a64.zip
Skip detecting content renames for binary files
This is similar to change Idbc2c29bd that skipped detecting content renames for large files. With this change, we added a new option in RenameDetector called "skipContentRenamesForBinaryFiles", that when set, causes binary files with any slight modification to be identified as added/deleted. The default for this boolean is false, so preserving current behaviour. Change-Id: I4770b1f69c60b1037025ddd0940ba86df6047299
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java51
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java28
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java13
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java26
4 files changed, 111 insertions, 7 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
index 2ea3cd7ebe..5edb60ce37 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
@@ -580,6 +580,57 @@ public class RenameDetectorTest extends RepositoryTestCase {
}
@Test
+ public void testExactRenameForBinaryFile_isIdentified() throws Exception {
+ ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
+
+ DiffEntry a = DiffEntry.add(PATH_A, aId);
+ DiffEntry b = DiffEntry.delete(PATH_Q, aId);
+
+ rd.add(a);
+ rd.add(b);
+
+ List<DiffEntry> entries = rd.compute();
+ assertEquals(1, entries.size());
+ assertRename(b, a, 100, entries.get(0));
+ }
+
+ @Test
+ public void testInexactRenameForBinaryFile_identifiedByDefault() throws Exception {
+ ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
+ ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");
+
+ DiffEntry a = DiffEntry.add(PATH_A, aId);
+ DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+ rd.add(a);
+ rd.add(b);
+ rd.setRenameScore(40);
+
+ List<DiffEntry> entries = rd.compute();
+ assertEquals(1, entries.size());
+ assertRename(b, a, 50, entries.get(0));
+ }
+
+ @Test
+ public void testInexactRenameForBinaryFile_notIdentifiedIfSkipParameterSet() throws Exception {
+ ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
+ ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");
+
+ DiffEntry a = DiffEntry.add(PATH_A, aId);
+ DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+ rd.add(a);
+ rd.add(b);
+ rd.setRenameScore(40);
+ rd.setSkipContentRenamesForBinaryFiles(true);
+
+ List<DiffEntry> entries = rd.compute();
+ assertEquals(2, entries.size());
+ assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0));
+ assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
+ }
+
+ @Test
public void testSetRenameScore_IllegalArgs() throws Exception {
try {
rd.setRenameScore(-1);
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
index 75784c2556..ba1f63b680 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
@@ -104,6 +104,13 @@ public class RenameDetector {
*/
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
+ /**
+ * Skip detecting content renames for binary files. Content renames are
+ * those that are not exact, that is with a slight content modification
+ * between the two files.
+ */
+ private boolean skipContentRenamesForBinaryFiles = false;
+
/** Set if the number of adds or deletes was over the limit. */
private boolean overRenameLimit;
@@ -236,6 +243,26 @@ public class RenameDetector {
}
/**
+ * Get skipping detecting content renames for binary files.
+ *
+ * @return true if content renames should be skipped for binary files, false otherwise.
+ * @since 5.12
+ */
+ public boolean getSkipContentRenamesForBinaryFiles() {
+ return skipContentRenamesForBinaryFiles;
+ }
+
+ /**
+ * Sets skipping detecting content renames for binary files.
+ *
+ * @param value true if content renames should be skipped for binary files, false otherwise.
+ * @since 5.12
+ */
+ public void setSkipContentRenamesForBinaryFiles(boolean value) {
+ this.skipContentRenamesForBinaryFiles = value;
+ }
+
+ /**
* Check if the detector is over the rename limit.
* <p>
* This method can be invoked either before or after {@code getEntries} has
@@ -521,6 +548,7 @@ public class RenameDetector {
d = new SimilarityRenameDetector(reader, deleted, added);
d.setRenameScore(getRenameScore());
d.setBigFileThreshold(getBigFileThreshold());
+ d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles());
d.compute(pm);
overRenameLimit |= d.isTableOverflow();
deleted = d.getLeftOverSources();
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
index fb6e5df589..661369b86a 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
@@ -102,6 +102,15 @@ public class SimilarityIndex {
idGrowAt = growAt(idHashBits);
}
+ static boolean isBinary(ObjectLoader obj) throws IOException {
+ if (obj.isLarge()) {
+ try (ObjectStream in1 = obj.openStream()) {
+ return RawText.isBinary(in1);
+ }
+ }
+ return RawText.isBinary(obj.getCachedBytes());
+ }
+
void hash(ObjectLoader obj) throws MissingObjectException, IOException,
TableFullException {
if (obj.isLarge()) {
@@ -115,9 +124,7 @@ public class SimilarityIndex {
private void hashLargeObject(ObjectLoader obj) throws IOException,
TableFullException {
boolean text;
- try (ObjectStream in1 = obj.openStream()) {
- text = !RawText.isBinary(in1);
- }
+ text = !isBinary(obj);
try (ObjectStream in2 = obj.openStream()) {
hash(in2, in2.getSize(), text);
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
index 082f31d178..5871b4aeea 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
@@ -26,6 +26,7 @@ import org.eclipse.jgit.errors.CancelledException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
+import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ProgressMonitor;
class SimilarityRenameDetector {
@@ -87,6 +88,9 @@ class SimilarityRenameDetector {
*/
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
+ /** Skip content renames for binary files. */
+ private boolean skipBinaryFiles = false;
+
/** Set if any {@link SimilarityIndex.TableFullException} occurs. */
private boolean tableOverflow;
@@ -107,6 +111,10 @@ class SimilarityRenameDetector {
bigFileThreshold = threshold;
}
+ void setSkipBinaryFiles(boolean value) {
+ skipBinaryFiles = value;
+ }
+
void compute(ProgressMonitor pm) throws IOException, CancelledException {
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
@@ -271,7 +279,12 @@ class SimilarityRenameDetector {
if (s == null) {
try {
- s = hash(OLD, srcEnt);
+ ObjectLoader loader = reader.open(OLD, srcEnt);
+ if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
+ pm.update(1);
+ continue SRC;
+ }
+ s = hash(loader);
} catch (TableFullException tableFull) {
tableOverflow = true;
continue SRC;
@@ -280,7 +293,12 @@ class SimilarityRenameDetector {
SimilarityIndex d;
try {
- d = hash(NEW, dstEnt);
+ ObjectLoader loader = reader.open(NEW, dstEnt);
+ if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
+ pm.update(1);
+ continue;
+ }
+ d = hash(loader);
} catch (TableFullException tableFull) {
if (dstTooLarge == null)
dstTooLarge = new BitSet(dsts.size());
@@ -364,10 +382,10 @@ class SimilarityRenameDetector {
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
}
- private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
+ private SimilarityIndex hash(ObjectLoader objectLoader)
throws IOException, TableFullException {
SimilarityIndex r = new SimilarityIndex();
- r.hash(reader.open(side, ent));
+ r.hash(objectLoader);
r.sort();
return r;
}

Back to the top