summaryrefslogtreecommitdiffstatsabout
diff options
context:
space:
mode:
authorJeff Schumacher2010-07-09 14:18:50 (EDT)
committer Jeff Schumacher2010-07-12 15:24:42 (EDT)
commit64b9458640bafadc70028c99bb038480309e568d (patch)
tree5b26e567efc9da1162d34a609dddd06a7c38d263
parenta8b29afd82795b3d98b42bf214fea27ab61984cc (diff)
downloadjgit-64b9458640bafadc70028c99bb038480309e568d.zip
jgit-64b9458640bafadc70028c99bb038480309e568d.tar.gz
jgit-64b9458640bafadc70028c99bb038480309e568d.tar.bz2
Added file size based rename detection optimizationrefs/changes/91/1091/2
Prior to this change, files that were very different in size (enough so that they could not have enough in common to be detected as renames) were still having their scores calculated. I added an optimization to skip such files. For example, if the rename detection threshold is 60%, the larger file is 200kb, and the smaller file is 50kb, the pair cannot be counted as a rename since they cannot possibly share 60% of their content in common. (200*.6=120, 120>50) Change-Id: Icd8315412d5de6292839778e7cea7fe6f061b0fc
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java32
1 files changed, 32 insertions, 0 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
index c92b1e3..a343fc0 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
@@ -205,6 +205,14 @@ class SimilarityRenameDetector {
//
matrix = new long[srcs.size() * dsts.size()];
+ long[] srcSizes = new long[srcs.size()];
+ long[] dstSizes = new long[dsts.size()];
+
+ // Init the size arrays to some value that indicates that we haven't
+ // calculated the size yet. Since sizes cannot be negative, -1 will work
+ Arrays.fill(srcSizes, -1);
+ Arrays.fill(dstSizes, -1);
+
// Consider each pair of files, if the score is above the minimum
// threshold we need record that scoring in the matrix so we can
// later find the best matches.
@@ -231,6 +239,26 @@ class SimilarityRenameDetector {
continue;
}
+ long srcSize = srcSizes[srcIdx];
+ if (srcSize < 0) {
+ srcSize = size(srcEnt.oldId.toObjectId());
+ srcSizes[srcIdx] = srcSize;
+ }
+
+ long dstSize = dstSizes[dstIdx];
+ if (dstSize < 0) {
+ dstSize = size(dstEnt.newId.toObjectId());
+ dstSizes[dstIdx] = dstSize;
+ }
+
+ long max = Math.max(srcSize, dstSize);
+ long min = Math.min(srcSize, dstSize);
+ if (min * 100 / max < renameScore) {
+ // Cannot possibly match, as the file sizes are so different
+ pm.update(1);
+ continue;
+ }
+
SimilarityIndex d = hash(dstEnt.newId.toObjectId());
int score = s.score(d);
@@ -259,6 +287,10 @@ class SimilarityRenameDetector {
return r;
}
+ private long size(ObjectId objectId) throws IOException {
+ return repo.openObject(objectId).getSize();
+ }
+
private static int score(long value) {
return (int) (value >>> SCORE_SHIFT);
}