Merge "Enable public access to SimilarityIndex scoring function"

author: Shawn Pearce 2015-05-26 20:49:28 +0000
committer: Gerrit Code Review @ Eclipse.org 2015-05-26 20:49:30 +0000
commit: 2ad2d85bcda42b5f2fde3c4126f07519e2c75c79 (patch)
tree: d08da381cc61eddb4d3fa0717fa73117a271638c
parent: 5635d9e1af61c054740037aa0934fca8ef34eaa4 (diff)
parent: 5e57cc95854dbf84bfafe3e61791a99b4d86746e (diff)
download: jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.tar.gz
jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.tar.xz
jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.zip
1 files changed, 48 insertions, 4 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
index f376b8e36e..1c40d7fcbf 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
@@ -63,10 +63,13 @@ import org.eclipse.jgit.lib.ObjectStream;
  * will not exceed 1 MiB per instance. The index starts out at a smaller size
  * (closer to 2 KiB), but may grow as more distinct blocks within the scanned
  * file are discovered.
+ *
+ * @since 4.0
  */
-class SimilarityIndex {
+public class SimilarityIndex {
 	/** A special {@link TableFullException} used in place of OutOfMemoryError. */
-	private static final TableFullException TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
+	public static final TableFullException
+			TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
 
 	/**
 	 * Shift to apply before storing a key.
@@ -105,6 +108,26 @@ class SimilarityIndex {
 	/** {@code idHash.length == 1 << idHashBits}. */
 	private int idHashBits;
 
+	/**
+	 * Create a new similarity index for the given object
+	 *
+	 * @param obj
+	 *            the object to hash
+	 * @return similarity index for this object
+	 * @throws IOException
+	 *             file contents cannot be read from the repository.
+	 * @throws TableFullException
+	 *             object hashing overflowed the storage capacity of the
+	 *             SimilarityIndex.
+	 */
+	public static SimilarityIndex create(ObjectLoader obj) throws IOException,
+			TableFullException {
+		SimilarityIndex idx = new SimilarityIndex();
+		idx.hash(obj);
+		idx.sort();
+		return idx;
+	}
+
 	SimilarityIndex() {
 		idHashBits = 8;
 		idHash = new long[1 << idHashBits];
@@ -212,7 +235,27 @@ class SimilarityIndex {
 		Arrays.sort(idHash);
 	}
 
-	int score(SimilarityIndex dst, int maxScore) {
+	/**
+	 * Compute the similarity score between this index and another.
+	 * <p>
+	 * A region of a file is defined as a line in a text file or a fixed-size
+	 * block in a binary file. To prepare an index, each region in the file is
+	 * hashed; the values and counts of hashes are retained in a sorted table.
+	 * Define the similarity fraction F as the the count of matching regions
+	 * between the two files divided between the maximum count of regions in
+	 * either file. The similarity score is F multiplied by the maxScore
+	 * constant, yielding a range [0, maxScore]. It is defined as maxScore for
+	 * the degenerate case of two empty files.
+	 * <p>
+	 * The similarity score is symmetrical; i.e. a.score(b) == b.score(a).
+	 *
+	 * @param dst
+	 *            the other index
+	 * @param maxScore
+	 *            the score representing a 100% match
+	 * @return the similarity score
+	 */
+	public int score(SimilarityIndex dst, int maxScore) {
 		long max = Math.max(hashedCnt, dst.hashedCnt);
 		if (max == 0)
 			return maxScore;
@@ -381,7 +424,8 @@ class SimilarityIndex {
 		return v & MAX_COUNT;
 	}
 
-	static class TableFullException extends Exception {
+	/** Thrown by {@code create()} when file is too large. */
+	public static class TableFullException extends Exception {
 		private static final long serialVersionUID = 1L;
 	}
 }
author	Shawn Pearce	2015-05-26 20:49:28 +0000
committer	Gerrit Code Review @ Eclipse.org	2015-05-26 20:49:30 +0000
commit	2ad2d85bcda42b5f2fde3c4126f07519e2c75c79 (patch)
tree	d08da381cc61eddb4d3fa0717fa73117a271638c
parent	5635d9e1af61c054740037aa0934fca8ef34eaa4 (diff)
parent	5e57cc95854dbf84bfafe3e61791a99b4d86746e (diff)
download	jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.tar.gz jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.tar.xz jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.zip