diff options
Diffstat (limited to 'bundles/org.eclipse.compare/plugins/org.eclipse.compare.core/src/org/eclipse/compare/internal/core/LCS.java')
-rw-r--r-- | bundles/org.eclipse.compare/plugins/org.eclipse.compare.core/src/org/eclipse/compare/internal/core/LCS.java | 454 |
1 files changed, 0 insertions, 454 deletions
diff --git a/bundles/org.eclipse.compare/plugins/org.eclipse.compare.core/src/org/eclipse/compare/internal/core/LCS.java b/bundles/org.eclipse.compare/plugins/org.eclipse.compare.core/src/org/eclipse/compare/internal/core/LCS.java deleted file mode 100644 index 26b40dde5..000000000 --- a/bundles/org.eclipse.compare/plugins/org.eclipse.compare.core/src/org/eclipse/compare/internal/core/LCS.java +++ /dev/null @@ -1,454 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2010 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - *******************************************************************************/ -package org.eclipse.compare.internal.core; - -import org.eclipse.core.runtime.OperationCanceledException; -import org.eclipse.core.runtime.SubMonitor; - - -/* Used to determine the change set responsible for each line */ -public abstract class LCS { - public static final double TOO_LONG = 10000000.0; // the value of N*M when - // to start binding the - // run time - - private static final double POW_LIMIT = 1.5; // limit the time to - // D^POW_LIMIT - - private int max_differences; // the maximum number of differences from - // each end to consider - - private int length; - - /** - * Myers' algorithm for longest common subsequence. O((M + N)D) worst case - * time, O(M + N + D^2) expected time, O(M + N) space - * (http://citeseer.ist.psu.edu/myers86ond.html) - * - * Note: Beyond implementing the algorithm as described in the paper I have - * added diagonal range compression which helps when finding the LCS of a - * very long and a very short sequence, also bound the running time to (N + - * M)^1.5 when both sequences are very long. - * - * After this method is called, the longest common subsequence is available - * by calling getResult() where result[0] is composed of - * entries from l1 and result[1] is composed of entries from l2 - * @param subMonitor - */ - public void longestCommonSubsequence(SubMonitor subMonitor) { - int length1 = getLength1(); - int length2 = getLength2(); - if (length1 == 0 || length2 == 0) { - this.length = 0; - return; - } - - this.max_differences = (length1 + length2 + 1) / 2; // ceil((N+M)/2) - if ((double) length1 * (double) length2 > TOO_LONG) { - // limit complexity to D^POW_LIMIT for long sequences - this.max_differences = (int) Math.pow(this.max_differences, POW_LIMIT - 1.0); - } - - initializeLcs(length1); - - subMonitor.beginTask(null, length1); - - /* - * The common prefixes and suffixes are always part of some LCS, include - * them now to reduce our search space - */ - int forwardBound; - int max = Math.min(length1, length2); - for (forwardBound = 0; forwardBound < max - && isRangeEqual(forwardBound, forwardBound); forwardBound++) { - setLcs(forwardBound, forwardBound); - worked(subMonitor, 1); - } - - int backBoundL1 = length1 - 1; - int backBoundL2 = length2 - 1; - - while (backBoundL1 >= forwardBound && backBoundL2 >= forwardBound - && isRangeEqual(backBoundL1, backBoundL2)) { - setLcs(backBoundL1, backBoundL2); - backBoundL1--; - backBoundL2--; - worked(subMonitor, 1); - } - - this.length = forwardBound - + length1 - - backBoundL1 - - 1 - + lcs_rec(forwardBound, backBoundL1, forwardBound, - backBoundL2, new int[2][length1 + length2 + 1], - new int[3], subMonitor); - - } - - /** - * The recursive helper function for Myers' LCS. Computes the LCS of - * l1[bottoml1 .. topl1] and l2[bottoml2 .. topl2] fills in the appropriate - * location in lcs and returns the length - * - * @param l1 The 1st sequence - * @param bottoml1 Index in the 1st sequence to start from (inclusive) - * @param topl1 Index in the 1st sequence to end on (inclusive) - * @param l2 The 2nd sequence - * @param bottoml2 Index in the 2nd sequence to start from (inclusive) - * @param topl2 Index in the 2nd sequence to end on (inclusive) - * @param V should be allocated as int[2][l1.length + l2.length + 1], used - * to store furthest reaching D-paths - * @param snake should be allocated as int[3], used to store the beginning - * x, y coordinates and the length of the latest snake traversed - * @param subMonitor - * @param lcs should be allocated as TextLine[2][l1.length], used to store - * the common points found to be part of the LCS where lcs[0] - * references lines of l1 and lcs[1] references lines of l2. - * - * @return the length of the LCS - */ - private int lcs_rec( - int bottoml1, int topl1, - int bottoml2, int topl2, - int[][] V, int[] snake, SubMonitor subMonitor) { - - // check that both sequences are non-empty - if (bottoml1 > topl1 || bottoml2 > topl2) { - return 0; - } - - int d = find_middle_snake(bottoml1, topl1, bottoml2, topl2, V, snake); - // System.out.println(snake[0] + " " + snake[1] + " " + snake[2]); - - // need to store these so we don't lose them when they're overwritten by - // the recursion - int len = snake[2]; - int startx = snake[0]; - int starty = snake[1]; - - // the middle snake is part of the LCS, store it - for (int i = 0; i < len; i++) { - setLcs(startx + i, starty + i); - worked(subMonitor, 1); - } - - if (d > 1) { - return len - + lcs_rec(bottoml1, startx - 1, bottoml2, starty - 1, V, snake, subMonitor) - + lcs_rec(startx + len, topl1, starty + len, topl2, V, snake, subMonitor); - } else if (d == 1) { - /* - * In this case the sequences differ by exactly 1 line. We have - * already saved all the lines after the difference in the for loop - * above, now we need to save all the lines before the difference. - */ - int max = Math.min(startx - bottoml1, starty - bottoml2); - for (int i = 0; i < max; i++) { - setLcs(bottoml1 + i, bottoml2 + i); - worked(subMonitor, 1); - } - return max + len; - } - - return len; - } - - private void worked(SubMonitor subMonitor, int work) { - if (subMonitor.isCanceled()) - throw new OperationCanceledException(); - subMonitor.worked(work); - } - - /** - * Helper function for Myers' LCS algorithm to find the middle snake for - * l1[bottoml1..topl1] and l2[bottoml2..topl2] The x, y coodrdinates of the - * start of the middle snake are saved in snake[0], snake[1] respectively - * and the length of the snake is saved in s[2]. - * - * @param l1 The 1st sequence - * @param bottoml1 Index in the 1st sequence to start from (inclusive) - * @param topl1 Index in the 1st sequence to end on (inclusive) - * @param l2 The 2nd sequence - * @param bottoml2 Index in the 2nd sequence to start from (inclusive) - * @param topl2 Index in the 2nd sequence to end on (inclusive) - * @param V should be allocated as int[2][l1.length + l2.length + 1], used - * to store furthest reaching D-paths - * @param snake should be allocated as int[3], used to store the beginning - * x, y coordinates and the length of the middle snake - * - * @return The number of differences (SES) between l1[bottoml1..topl1] and - * l2[bottoml2..topl2] - */ - private int find_middle_snake( - int bottoml1, int topl1, - int bottoml2, int topl2, - int[][] V, int[] snake) { - int N = topl1 - bottoml1 + 1; - int M = topl2 - bottoml2 + 1; - // System.out.println("N: " + N + " M: " + M + " bottom: " + bottoml1 + - // ", " + - // bottoml2 + " top: " + topl1 + ", " + topl2); - int delta = N - M; - boolean isEven; - if ((delta & 1) == 1) { - isEven = false; - } else { - isEven = true; - } - - int limit = Math.min(this.max_differences, (N + M + 1) / 2); // ceil((N+M)/2) - - int value_to_add_forward; // a 0 or 1 that we add to the start offset - // to make it odd/even - if ((M & 1) == 1) { - value_to_add_forward = 1; - } else { - value_to_add_forward = 0; - } - - int value_to_add_backward; - if ((N & 1) == 1) { - value_to_add_backward = 1; - } else { - value_to_add_backward = 0; - } - - int start_forward = -M; - int end_forward = N; - int start_backward = -N; - int end_backward = M; - - V[0][limit + 1] = 0; - V[1][limit - 1] = N; - for (int d = 0; d <= limit; d++) { - - int start_diag = Math.max(value_to_add_forward + start_forward, -d); - int end_diag = Math.min(end_forward, d); - value_to_add_forward = 1 - value_to_add_forward; - - // compute forward furthest reaching paths - for (int k = start_diag; k <= end_diag; k += 2) { - int x; - if (k == -d - || (k < d && V[0][limit + k - 1] < V[0][limit + k + 1])) { - x = V[0][limit + k + 1]; - } else { - x = V[0][limit + k - 1] + 1; - } - - int y = x - k; - - snake[0] = x + bottoml1; - snake[1] = y + bottoml2; - snake[2] = 0; - // System.out.println("1 x: " + x + " y: " + y + " k: " + k + " - // d: " + d ); - while (x < N && y < M - && isRangeEqual(x + bottoml1, y + bottoml2)) { - x++; - y++; - snake[2]++; - } - V[0][limit + k] = x; - // System.out.println(x + " " + V[1][limit+k -delta] + " " + k + - // " " + delta); - if (!isEven && k >= delta - d + 1 && k <= delta + d - 1 - && x >= V[1][limit + k - delta]) { - // System.out.println("Returning: " + (2*d-1)); - return 2 * d - 1; - } - - // check to see if we can cut down the diagonal range - if (x >= N && end_forward > k - 1) { - end_forward = k - 1; - } else if (y >= M) { - start_forward = k + 1; - value_to_add_forward = 0; - } - } - - start_diag = Math.max(value_to_add_backward + start_backward, -d); - end_diag = Math.min(end_backward, d); - value_to_add_backward = 1 - value_to_add_backward; - - // compute backward furthest reaching paths - for (int k = start_diag; k <= end_diag; k += 2) { - int x; - if (k == d - || (k != -d && V[1][limit + k - 1] < V[1][limit + k + 1])) { - x = V[1][limit + k - 1]; - } else { - x = V[1][limit + k + 1] - 1; - } - - int y = x - k - delta; - - snake[2] = 0; - // System.out.println("2 x: " + x + " y: " + y + " k: " + k + " - // d: " + d); - while (x > 0 && y > 0 - && isRangeEqual(x - 1 + bottoml1, y - 1 + bottoml2)) { - x--; - y--; - snake[2]++; - } - V[1][limit + k] = x; - - if (isEven && k >= -delta - d && k <= d - delta - && x <= V[0][limit + k + delta]) { - // System.out.println("Returning: " + 2*d); - snake[0] = bottoml1 + x; - snake[1] = bottoml2 + y; - - return 2 * d; - } - - // check to see if we can cut down our diagonal range - if (x <= 0) { - start_backward = k + 1; - value_to_add_backward = 0; - } else if (y <= 0 && end_backward > k - 1) { - end_backward = k - 1; - } - } - } - - /* - * computing the true LCS is too expensive, instead find the diagonal - * with the most progress and pretend a midle snake of length 0 occurs - * there. - */ - - int[] most_progress = findMostProgress(M, N, limit, V); - - snake[0] = bottoml1 + most_progress[0]; - snake[1] = bottoml2 + most_progress[1]; - snake[2] = 0; - return 5; /* - * HACK: since we didn't really finish the LCS computation - * we don't really know the length of the SES. We don't do - * anything with the result anyway, unless it's <=1. We know - * for a fact SES > 1 so 5 is as good a number as any to - * return here - */ - } - - /** - * Takes the array with furthest reaching D-paths from an LCS computation - * and returns the x,y coordinates and progress made in the middle diagonal - * among those with maximum progress, both from the front and from the back. - * - * @param M the length of the 1st sequence for which LCS is being computed - * @param N the length of the 2nd sequence for which LCS is being computed - * @param limit the number of steps made in an attempt to find the LCS from - * the front and back - * @param V the array storing the furthest reaching D-paths for the LCS - * computation - * @return The result as an array of 3 integers where result[0] is the x - * coordinate of the current location in the diagonal with the most - * progress, result[1] is the y coordinate of the current location - * in the diagonal with the most progress and result[2] is the - * amount of progress made in that diagonal - */ - private static int[] findMostProgress(int M, int N, int limit, int[][] V) { - int delta = N - M; - - int forward_start_diag; - if ((M & 1) == (limit & 1)) { - forward_start_diag = Math.max(-M, -limit); - } else { - forward_start_diag = Math.max(1 - M, -limit); - } - - int forward_end_diag = Math.min(N, limit); - - int backward_start_diag; - if ((N & 1) == (limit & 1)) { - backward_start_diag = Math.max(-N, -limit); - } else { - backward_start_diag = Math.max(1 - N, -limit); - } - - int backward_end_diag = Math.min(M, limit); - - int[][] max_progress = new int[Math.max(forward_end_diag - - forward_start_diag, backward_end_diag - backward_start_diag) / 2 + 1][3]; - int num_progress = 0; // the 1st entry is current, it is initialized - // with 0s - - // first search the forward diagonals - for (int k = forward_start_diag; k <= forward_end_diag; k += 2) { - int x = V[0][limit + k]; - int y = x - k; - if (x > N || y > M) { - continue; - } - - int progress = x + y; - if (progress > max_progress[0][2]) { - num_progress = 0; - max_progress[0][0] = x; - max_progress[0][1] = y; - max_progress[0][2] = progress; - } else if (progress == max_progress[0][2]) { - num_progress++; - max_progress[num_progress][0] = x; - max_progress[num_progress][1] = y; - max_progress[num_progress][2] = progress; - } - } - - boolean max_progress_forward = true; // initially the maximum - // progress is in the forward - // direction - - // now search the backward diagonals - for (int k = backward_start_diag; k <= backward_end_diag; k += 2) { - int x = V[1][limit + k]; - int y = x - k - delta; - if (x < 0 || y < 0) { - continue; - } - - int progress = N - x + M - y; - if (progress > max_progress[0][2]) { - num_progress = 0; - max_progress_forward = false; - max_progress[0][0] = x; - max_progress[0][1] = y; - max_progress[0][2] = progress; - } else if (progress == max_progress[0][2] && !max_progress_forward) { - num_progress++; - max_progress[num_progress][0] = x; - max_progress[num_progress][1] = y; - max_progress[num_progress][2] = progress; - } - } - - // return the middle diagonal with maximal progress. - return max_progress[num_progress / 2]; - } - - protected abstract int getLength2(); - - protected abstract int getLength1(); - - protected abstract boolean isRangeEqual(int i1, int i2); - - protected abstract void setLcs(int sl1, int sl2); - - protected abstract void initializeLcs(int lcsLength); - - public int getLength() { - return this.length; - } -} |