summaryrefslogtreecommitdiffstatsabout
diff options
context:
space:
mode:
authorJeff Schumacher2010-07-13 15:21:12 (EDT)
committer Jeff Schumacher2010-07-16 12:56:42 (EDT)
commit31311cacfd311f9a78bdf012a9f6ba1907b9964a (patch)
tree2f68c9b3af58ca0ffd1d96c05d49d8a64ebb2275
parentf666cc755b35a8f57a7c208c88230f7ab7c1d07b (diff)
downloadjgit-31311cacfd311f9a78bdf012a9f6ba1907b9964a.zip
jgit-31311cacfd311f9a78bdf012a9f6ba1907b9964a.tar.gz
jgit-31311cacfd311f9a78bdf012a9f6ba1907b9964a.tar.bz2
Implemented file path based tie breaking to exact rename detectionrefs/changes/30/1130/2
During the exact rename detection phase in RenameDetector, ties were resolved on a first-found basis. I added support for file path based tie breaking during that phase. Basically, there are four situations that have to be handled: One add matching one delete: In this simple case, we pair them as a rename. One add matching many deletes: Find the delete whos path matches the add the closest, and pair them as a rename. Many adds matching one delete: Similar to the above case, we find the add that matches the delete the closest, and pair them as a rename. The other adds are marked as copies of the delete. Many adds matching many deletes: Build a scoring matrix similar to the one used for content- based matching, scoring instead by file path. Some of the utility functions in SimilarityRenameDetector are used in this case, as we use the same encoding scheme. Once the matrix is built, scan it for the best matches, marking them as renames. The rest are marked as copies. I don't particularly like the idea of using utility functions right out of SimilarityRenameDetector, but it works for the moment. A later commit will likely refactor this into a common utility class, as well as bringing exact rename detection out of RenameDetector and into a separate class, much like SimilarityRenameDetector. Change-Id: I1fb08390aebdcbf20d049aecf402a36506e55611
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java58
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java27
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java211
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java8
4 files changed, 257 insertions, 47 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
index 86b40e2..c32c786 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
@@ -146,6 +146,48 @@ public class RenameDetectorTest extends RepositoryTestCase {
assertRename(a, d, 100, entries.get(2));
}
+ public void testExactRename_PathBreaksTie() throws Exception {
+ ObjectId foo = blob("foo");
+
+ DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo);
+ DiffEntry b = DiffEntry.delete("src/com/foo/b.java", foo);
+
+ DiffEntry c = DiffEntry.add("c.txt", foo);
+ DiffEntry d = DiffEntry.delete("d.txt", foo);
+
+ // Add out of order to avoid first-match succeeding
+ rd.add(a);
+ rd.add(d);
+ rd.add(b);
+ rd.add(c);
+
+ List<DiffEntry> entries = rd.compute();
+ assertEquals(2, entries.size());
+ assertRename(d, c, 100, entries.get(0));
+ assertRename(b, a, 100, entries.get(1));
+ }
+
+ public void testExactRename_OneDeleteManyAdds() throws Exception {
+ ObjectId foo = blob("foo");
+
+ DiffEntry a = DiffEntry.add("src/com/foo/a.java", foo);
+ DiffEntry b = DiffEntry.add("src/com/foo/b.java", foo);
+ DiffEntry c = DiffEntry.add("c.txt", foo);
+
+ DiffEntry d = DiffEntry.delete("d.txt", foo);
+
+ rd.add(a);
+ rd.add(b);
+ rd.add(c);
+ rd.add(d);
+
+ List<DiffEntry> entries = rd.compute();
+ assertEquals(3, entries.size());
+ assertRename(d, c, 100, entries.get(0));
+ assertCopy(d, a, 100, entries.get(1));
+ assertCopy(d, b, 100, entries.get(2));
+ }
+
public void testInexactRename_OnePair() throws Exception {
ObjectId aId = blob("foo\nbar\nbaz\nblarg\n");
ObjectId bId = blob("foo\nbar\nbaz\nblah\n");
@@ -385,4 +427,20 @@ public class RenameDetectorTest extends RepositoryTestCase {
assertEquals(score, rename.getScore());
}
+
+ private static void assertCopy(DiffEntry o, DiffEntry n, int score,
+ DiffEntry copy) {
+ assertEquals(ChangeType.COPY, copy.getChangeType());
+
+ assertEquals(o.getOldName(), copy.getOldName());
+ assertEquals(n.getNewName(), copy.getNewName());
+
+ assertEquals(o.getOldMode(), copy.getOldMode());
+ assertEquals(n.getNewMode(), copy.getNewMode());
+
+ assertEquals(o.getOldId(), copy.getOldId());
+ assertEquals(n.getNewId(), copy.getNewId());
+
+ assertEquals(score, copy.getScore());
+ }
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java
index 4d25dbd..304e4cf 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/DiffEntry.java
@@ -315,4 +315,31 @@ public class DiffEntry {
public AbbreviatedObjectId getNewId() {
return newId;
}
+
+ @Override
+ public String toString() {
+ StringBuilder buf = new StringBuilder();
+ buf.append("DiffEntry[");
+ buf.append(changeType);
+ buf.append(" ");
+ switch (changeType) {
+ case ADD:
+ buf.append(newName);
+ break;
+ case COPY:
+ buf.append(oldName + "->" + newName);
+ break;
+ case DELETE:
+ buf.append(oldName);
+ break;
+ case MODIFY:
+ buf.append(oldName);
+ break;
+ case RENAME:
+ buf.append(oldName + "->" + newName);
+ break;
+ }
+ buf.append("]");
+ return buf.toString();
+ }
} \ No newline at end of file
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
index 5bb63c4..bc23940 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
@@ -45,6 +45,7 @@ package org.eclipse.jgit.diff;
import java.io.IOException;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
@@ -312,70 +313,131 @@ public class RenameDetector {
return;
pm.beginTask(JGitText.get().renamesFindingExact, //
- added.size() + deleted.size());
+ added.size() + added.size() + deleted.size()
+ + added.size() * deleted.size());
- HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
- for (DiffEntry del : deleted) {
- Object old = map.put(del.oldId, del);
- if (old instanceof DiffEntry) {
- ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2);
- list.add((DiffEntry) old);
- list.add(del);
- map.put(del.oldId, list);
+ HashMap<AbbreviatedObjectId, Object> deletedMap = populateMap(deleted, pm);
+ HashMap<AbbreviatedObjectId, Object> addedMap = populateMap(added, pm);
- } else if (old != null) {
- // Must be a list of DiffEntries
- ((List) old).add(del);
- map.put(del.oldId, old);
- }
- pm.update(1);
+ ArrayList<DiffEntry> uniqueAdds = new ArrayList<DiffEntry>(added.size());
+ ArrayList<List<DiffEntry>> nonUniqueAdds = new ArrayList<List<DiffEntry>>();
+
+ for (Object o : addedMap.values()) {
+ if (o instanceof DiffEntry)
+ uniqueAdds.add((DiffEntry) o);
+ else
+ nonUniqueAdds.add((List<DiffEntry>) o);
}
ArrayList<DiffEntry> left = new ArrayList<DiffEntry>(added.size());
- for (DiffEntry dst : added) {
- Object del = map.get(dst.newId);
+
+ for (DiffEntry a : uniqueAdds) {
+ Object del = deletedMap.get(a.newId);
if (del instanceof DiffEntry) {
+ // We have one add to one delete: pair them if they are the same
+ // type
DiffEntry e = (DiffEntry) del;
- if (sameType(e.oldMode, dst.newMode)) {
- if (e.changeType == ChangeType.DELETE) {
- e.changeType = ChangeType.RENAME;
- entries.add(exactRename(e, dst));
- } else {
- entries.add(exactCopy(e, dst));
- }
+ if (sameType(e.oldMode, a.newMode)) {
+ e.changeType = ChangeType.RENAME;
+ entries.add(exactRename(e, a));
} else {
- left.add(dst);
+ left.add(a);
}
-
} else if (del != null) {
+ // We have one add to many deletes: find the delete with the
+ // same type and closest name to the add, then pair them
List<DiffEntry> list = (List<DiffEntry>) del;
- DiffEntry best = null;
- for (DiffEntry e : list) {
- if (sameType(e.oldMode, dst.newMode)) {
- best = e;
- break;
- }
+ DiffEntry best = bestPathMatch(a, list);
+ if (best != null) {
+ best.changeType = ChangeType.RENAME;
+ entries.add(exactRename(best, a));
+ } else {
+ left.add(a);
}
+ } else {
+ left.add(a);
+ }
+ pm.update(1);
+ }
+
+ for (List<DiffEntry> adds : nonUniqueAdds) {
+ Object o = deletedMap.get(adds.get(0).newId);
+ if (o instanceof DiffEntry) {
+ // We have many adds to one delete: find the add with the same
+ // type and closest name to the delete, then pair them. Mark the
+ // rest as copies of the delete.
+ DiffEntry d = (DiffEntry) o;
+ DiffEntry best = bestPathMatch(d, adds);
if (best != null) {
- if (best.changeType == ChangeType.DELETE) {
- best.changeType = ChangeType.RENAME;
- entries.add(exactRename(best, dst));
- } else {
- entries.add(exactCopy(best, dst));
+ d.changeType = ChangeType.RENAME;
+ entries.add(exactRename(d, best));
+ for (DiffEntry a : adds) {
+ if (a != best) {
+ if (sameType(d.oldMode, a.newMode)) {
+ entries.add(exactCopy(d, a));
+ } else {
+ left.add(a);
+ }
+ }
}
} else {
- left.add(dst);
+ left.addAll(adds);
}
-
} else {
- left.add(dst);
+ // We have many adds to many deletes: score all the adds against
+ // all the deletes by path name, take the best matches, pair
+ // them as renames, then call the rest copies
+ List<DiffEntry> dels = (List<DiffEntry>) o;
+ long[] matrix = new long[dels.size() * adds.size()];
+ int mNext = 0;
+ for (int addIdx = 0; addIdx < adds.size(); addIdx++) {
+ String addedName = adds.get(addIdx).newName;
+
+ for (int delIdx = 0; delIdx < dels.size(); delIdx++) {
+ String deletedName = dels.get(delIdx).oldName;
+
+ int score = SimilarityRenameDetector.nameScore(addedName, deletedName);
+ matrix[mNext] = SimilarityRenameDetector.encode(score, addIdx, delIdx);
+ mNext++;
+ }
+ }
+
+ Arrays.sort(matrix);
+
+ for (--mNext; mNext >= 0; mNext--) {
+ long ent = matrix[mNext];
+ int delIdx = SimilarityRenameDetector.srcFile(ent);
+ int addIdx = SimilarityRenameDetector.dstFile(ent);
+ DiffEntry d = dels.get(delIdx);
+ DiffEntry a = adds.get(addIdx);
+
+ if (a == null) {
+ pm.update(1);
+ continue; // was already matched earlier
+ }
+
+ ChangeType type;
+ if (d.changeType == ChangeType.DELETE) {
+ // First use of this source file. Tag it as a rename so we
+ // later know it is already been used as a rename, other
+ // matches (if any) will claim themselves as copies instead.
+ //
+ d.changeType = ChangeType.RENAME;
+ type = ChangeType.RENAME;
+ } else {
+ type = ChangeType.COPY;
+ }
+
+ entries.add(DiffEntry.pair(type, d, a, 100));
+ adds.set(addIdx, null); // Claim the destination was matched.
+ pm.update(1);
+ }
}
- pm.update(1);
}
added = left;
- deleted = new ArrayList<DiffEntry>(map.size());
- for (Object o : map.values()) {
+ deleted = new ArrayList<DiffEntry>(deletedMap.size());
+ for (Object o : deletedMap.values()) {
if (o instanceof DiffEntry) {
DiffEntry e = (DiffEntry) o;
if (e.changeType == ChangeType.DELETE)
@@ -391,6 +453,69 @@ public class RenameDetector {
pm.endTask();
}
+ /**
+ * Find the best match by file path for a given DiffEntry from a list of
+ * DiffEntrys. The returned DiffEntry will be of the same type as <src>. If
+ * no DiffEntry can be found that has the same type, this method will return
+ * null.
+ *
+ * @param src
+ * the DiffEntry to try to find a match for
+ * @param list
+ * a list of DiffEntrys to search through
+ * @return the DiffEntry from <list> who's file path best matches <src>
+ */
+ private static DiffEntry bestPathMatch(DiffEntry src, List<DiffEntry> list) {
+ DiffEntry best = null;
+ int score = -1;
+
+ for (DiffEntry d : list) {
+ if (sameType(mode(d), mode(src))) {
+ int tmp = SimilarityRenameDetector
+ .nameScore(path(d), path(src));
+ if (tmp > score) {
+ best = d;
+ score = tmp;
+ }
+ }
+ }
+
+ return best;
+ }
+
+ @SuppressWarnings("unchecked")
+ private HashMap<AbbreviatedObjectId, Object> populateMap(
+ List<DiffEntry> diffEntries, ProgressMonitor pm) {
+ HashMap<AbbreviatedObjectId, Object> map = new HashMap<AbbreviatedObjectId, Object>();
+ for (DiffEntry de : diffEntries) {
+ Object old = map.put(id(de), de);
+ if (old instanceof DiffEntry) {
+ ArrayList<DiffEntry> list = new ArrayList<DiffEntry>(2);
+ list.add((DiffEntry) old);
+ list.add(de);
+ map.put(id(de), list);
+ } else if (old != null) {
+ // Must be a list of DiffEntries
+ ((List<DiffEntry>) old).add(de);
+ map.put(id(de), old);
+ }
+ pm.update(1);
+ }
+ return map;
+ }
+
+ private static String path(DiffEntry de) {
+ return de.changeType == ChangeType.DELETE ? de.oldName : de.newName;
+ }
+
+ private static FileMode mode(DiffEntry de) {
+ return de.changeType == ChangeType.DELETE ? de.oldMode : de.newMode;
+ }
+
+ private static AbbreviatedObjectId id(DiffEntry de) {
+ return de.changeType == ChangeType.DELETE ? de.oldId : de.newId;
+ }
+
static boolean sameType(FileMode a, FileMode b) {
// Files have to be of the same type in order to rename them.
// We would never want to rename a file to a gitlink, or a
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
index 6590f74..e2115f0 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
@@ -287,7 +287,7 @@ class SimilarityRenameDetector {
return mNext;
}
- private int nameScore(String a, String b) {
+ static int nameScore(String a, String b) {
int aDirLen = a.lastIndexOf("/") + 1;
int bDirLen = b.lastIndexOf("/") + 1;
@@ -349,15 +349,15 @@ class SimilarityRenameDetector {
return (int) (value >>> SCORE_SHIFT);
}
- private static int srcFile(long value) {
+ static int srcFile(long value) {
return decodeFile(((int) (value >>> BITS_PER_INDEX)) & INDEX_MASK);
}
- private static int dstFile(long value) {
+ static int dstFile(long value) {
return decodeFile(((int) value) & INDEX_MASK);
}
- private static long encode(int score, int srcIdx, int dstIdx) {
+ static long encode(int score, int srcIdx, int dstIdx) {
return (((long) score) << SCORE_SHIFT) //
| (encodeFile(srcIdx) << BITS_PER_INDEX) //
| encodeFile(dstIdx);