diff options
Diffstat (limited to 'examples/org.eclipse.compare.examples.xml/src/org/eclipse/compare/examples/xml/GeneralMatching.java')
-rw-r--r-- | examples/org.eclipse.compare.examples.xml/src/org/eclipse/compare/examples/xml/GeneralMatching.java | 531 |
1 files changed, 0 insertions, 531 deletions
diff --git a/examples/org.eclipse.compare.examples.xml/src/org/eclipse/compare/examples/xml/GeneralMatching.java b/examples/org.eclipse.compare.examples.xml/src/org/eclipse/compare/examples/xml/GeneralMatching.java deleted file mode 100644 index 43d7bb1c6..000000000 --- a/examples/org.eclipse.compare.examples.xml/src/org/eclipse/compare/examples/xml/GeneralMatching.java +++ /dev/null @@ -1,531 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2004 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Common Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/cpl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - *******************************************************************************/ -package org.eclipse.compare.examples.xml; - -import java.util.ArrayList; -import java.util.Iterator; -import java.util.ListIterator; -import java.util.Vector; - -import org.eclipse.core.runtime.IProgressMonitor; - -/** This class is used to find a mapping between the nodes of two xml parse trees - * When an identifier for a specific node is known, it will be used. - * Otherwise a min-cost bipartite matching must be solved. - * This algorithm uses the algorithm described in the paper - * "X-Diff: A Fast Change Detection Algorithm for XML Documents" - */ -public class GeneralMatching extends AbstractMatching { - - HungarianMethod fH; - - boolean fUseOrdered; - String[] fOrdered; - boolean fIgnoreStartsWith; - - public GeneralMatching() { - fOrdered= null; - fUseOrdered= false; - fIgnoreStartsWith= false; - } - - public GeneralMatching(ArrayList ordered) { - if (ordered != null && !ordered.isEmpty()) { - fUseOrdered= true; - fOrdered= new String[ordered.size()]; - int i= 0; - for (Iterator iter= ordered.iterator(); iter.hasNext(); i++) { - fOrdered[i]= (String) iter.next(); - } - } else { - fUseOrdered= false; - fOrdered= null; - } - //fOrderedElements= XMLPlugin.getDefault().getOrderedElements(); - } - - //x and y have children xc_orig and yc_orig, respectively - protected int unorderedMatch( - XMLNode x, - XMLNode y, - Object[] xc_orig, - Object[] yc_orig) { - ArrayList DTMatching= new ArrayList(); //Mathing Entry in fDT_Matchings - int distance= 0; //distance - - Vector xc_vect= new Vector(); - Vector yc_vect= new Vector(); - for (int i= 0; i < xc_orig.length; i++) { - if (((XMLNode) xc_orig[i]).usesIDMAP()) { - int j; - for (j= 0; - j < yc_orig.length - && !((XMLNode) yc_orig[j]).getOrigId().equals( - ((XMLNode) xc_orig[i]).getOrigId()); - j++); - if (j < yc_orig.length) { - /* not calculating their distance and adding it to variable "distance" to save time, - * as matching of subtrees is not performed. - * but better result might be achieved if this were done. - */ - //int d= dist( (XMLNode)xc_orig[i], (XMLNode)yc_orig[j] ); - //distance += d; - //fDT[indexOfLN((XMLNode)xc_orig[i])][indexOfRN((XMLNode)yc_orig[j])]= d; - DTMatching.add( - new Match((XMLNode) xc_orig[i], (XMLNode) yc_orig[j])); - } - } else - xc_vect.add(xc_orig[i]); - } - XMLNode[] xc= (XMLNode[]) xc_vect.toArray(new XMLNode[xc_vect.size()]); - for (int j= 0; j < yc_orig.length; j++) { - if (!((XMLNode) yc_orig[j]).usesIDMAP()) - yc_vect.add(yc_orig[j]); - } - XMLNode[] yc= (XMLNode[]) yc_vect.toArray(new XMLNode[yc_vect.size()]); - if (xc.length == 0 || yc.length == 0) { - if (xc.length == 0) { - for (int j= 0; j < yc.length; j++) { - distance += countNodes(yc[j]); - } - } else { //yc.length == 0 - for (int i= 0; i < xc.length; i++) { - distance += countNodes(xc[i]); - } - } - } else { - for (int i= 0; i < xc.length; i++) { - for (int j= 0; j < yc.length; j++) { - if (fDT[indexOfLN(xc[i])][indexOfRN(yc[j])] == NO_ENTRY) - dist(xc[i], yc[j]); - } - } - - /* look for Wmin (p.11) - * xc and yc are the two partitions that have to be mapped. - * But, they may not have same number of nodes - * HungarianMethod.java solves weighted matching only on complete bipatite graphs - * We must add nodes and edges to make graph complete - */ - final int array_size= - (xc.length > yc.length) ? xc.length : yc.length; - final int array_rowsize= array_size + 1; - final int array_colsize= array_size + 2; - int[][] A= new int[array_rowsize][array_colsize]; - for (int j= 0; j < array_colsize; j++) { - A[0][j]= 0; - } - /* now: A[0] = new int[] {0,0,0, ... ,0}. This first row is not used by HungarianMethod - * (Fortran77 counts Array index from 1) - */ - for (int i= 1; i < array_rowsize; i++) { - A[i][0]= 0; - for (int j= 1; j < array_colsize - 1; j++) { - A[i][j]= -1; - } - A[i][array_colsize - 1]= 0; - } - /* now A = 0, 0, 0, ... 0,0 - * 0,-1,-1, ... -1,0 - * 0,-1,-1, ... -1,0 - * ... - * 0,-1,-1, ... -1,0 - */ - for (int i_xc= 0; i_xc < xc.length; i_xc++) { - for (int i_yc= 0; i_yc < yc.length; i_yc++) { - A[i_xc + 1][i_yc + 1]= - fDT[indexOfLN(xc[i_xc])][indexOfRN(yc[i_yc])]; - } - } - int dummyCost= 0; - /* cost of dummy nodes not associated with a node in Tree, but needed - * to have a complete bipartite graph - */ - - //set dummyCost to larger than any cost in A - if (xc.length > yc.length) { - for (int i= 1; i < array_rowsize; i++) { - for (int j= 1; j <= yc.length; j++) - if (A[i][j] > dummyCost) - dummyCost= A[i][j]; - } - } else if (xc.length < yc.length) { - for (int i= 1; i <= xc.length; i++) { - for (int j= 1; j < array_colsize - 1; j++) - if (A[i][j] > dummyCost) - dummyCost= A[i][j]; - } - } else { //xc.length == yc.length - dummyCost= Integer.MAX_VALUE - 1; - } - dummyCost += 1; - - if (xc.length > yc.length) { - for (int i= 1; i < array_rowsize; i++) { - for (int j= yc.length + 1; j < array_colsize - 1; j++) { - A[i][j]= dummyCost; - } - } - } else if (xc.length < yc.length) { - for (int j= 1; j < array_colsize - 1; j++) { - for (int i= xc.length + 1; i < array_rowsize; i++) { - A[i][j]= dummyCost; - } - } - } - - //A is built. Now perform matching - int[] Matching= new int[array_rowsize]; - int[][] A2= new int[array_rowsize][array_colsize]; - for (int i= 0; i < array_rowsize; i++) { - for (int j= 0; j < array_colsize; j++) - A2[i][j]= A[i][j]; - } - fH.solve(A2, Matching); - //now Matching contains the min-cost matching of A - - for (int m= 1; m < Matching.length; m++) { - if (A[Matching[m]][m] == dummyCost) { - if (xc.length > yc.length) { - distance += countNodes(xc[Matching[m] - 1]); - //added here - DTMatching.add(new Match(xc[Matching[m] - 1], null)); - } else if (xc.length < yc.length) { - distance += countNodes(yc[m - 1]); - //added here - DTMatching.add(new Match(null, yc[m - 1])); - } - } else { - int index_x= indexOfLN(xc[Matching[m] - 1]); - int index_y= indexOfRN(yc[m - 1]); - distance += fDT[index_x][index_y]; - if ((xc[Matching[m] - 1]) - .getSignature() - .equals((yc[m - 1]).getSignature())) - DTMatching.add( - new Match(xc[Matching[m] - 1], yc[m - 1])); - else { - DTMatching.add(new Match(xc[Matching[m] - 1], null)); - DTMatching.add(new Match(null, yc[m - 1])); - } - } - } - } - fDT[indexOfLN(x)][indexOfRN(y)]= distance; - fDT_Matchings[indexOfLN(x)][indexOfRN(y)]= DTMatching; - return distance; - } - - protected int orderedMath(XMLNode x, XMLNode y) { - //assumes x and y have children - - boolean old_isw= fIgnoreStartsWith; - fIgnoreStartsWith= true; - - //both x and y have children - Object[] xc= x.getChildren(); - Object[] yc= y.getChildren(); - - ArrayList xc_elementsAL= new ArrayList(); - ArrayList xc_attrsAL= new ArrayList(); - - ArrayList yc_elementsAL= new ArrayList(); - ArrayList yc_attrsAL= new ArrayList(); - - //find attributes and elements and put them in xc_elementsAL and xc_attrsAL, respectively - for (int i= 0; i < xc.length; i++) { - XMLNode x_i= (XMLNode) xc[i]; - if (x_i.getXMLType().equals(XMLStructureCreator.TYPE_ELEMENT)) { - xc_elementsAL.add(x_i); - } else if ( - x_i.getXMLType().equals(XMLStructureCreator.TYPE_ATTRIBUTE)) { - xc_attrsAL.add(x_i); - } - } - - //do the same for yc - for (int i= 0; i < yc.length; i++) { - XMLNode y_i= (XMLNode) yc[i]; - if (y_i.getXMLType().equals(XMLStructureCreator.TYPE_ELEMENT)) { - yc_elementsAL.add(y_i); - } else if ( - y_i.getXMLType().equals(XMLStructureCreator.TYPE_ATTRIBUTE)) { - yc_attrsAL.add(y_i); - } - } - - Object[] xc_elements= xc_elementsAL.toArray(); - Object[] yc_elements= yc_elementsAL.toArray(); - Object[] xc_attrs= xc_attrsAL.toArray(); - Object[] yc_attrs= yc_attrsAL.toArray(); - - ArrayList DTMatching= null; - //Mathing to be added to Entry in fDT_Matchings - int distance= 0; //distance to be added to entry in fDT - - //perform unordered matching on attributes - //this updates fDT and fDT_Matchings - if (xc_attrs.length > 0 || yc_attrs.length > 0) { - if (xc_attrs.length == 0) - distance += yc_attrs.length; - else if (yc_attrs.length == 0) - distance += xc_attrs.length; - else { - unorderedMatch(x, y, xc_attrs, yc_attrs); - distance += fDT[indexOfLN(x)][indexOfRN(y)]; - DTMatching= fDT_Matchings[indexOfLN(x)][indexOfRN(y)]; - } - } - if (DTMatching == null) - DTMatching= new ArrayList(); - //perform ordered matching on element children, i.e. number them in order of appearance - - /* start new */ - distance= - handleRangeDifferencer( - xc_elements, - yc_elements, - DTMatching, - distance); - /* end new */ - - /* start: Naive ordered compare /* - // int minlength= (xc_elements.length > yc_elements.length)?yc_elements.length:xc_elements.length; - // for (int i= 0; i < minlength; i++) { - // distance += dist((XMLNode) xc_elements[i], (XMLNode) yc_elements[i]); - // DTMatching.add(new Match( (XMLNode)xc_elements[i], (XMLNode)yc_elements[i])); - // } - // if (xc_elements.length > yc_elements.length) { - // for (int i= minlength; i < xc_elements.length; i++) { - // distance += countNodes((XMLNode) xc_elements[i]); - // } - // } else if (xc_elements.length < yc_elements.length) { - // for (int i= minlength; i < yc_elements.length; i++) { - // distance += countNodes((XMLNode) yc_elements[i]); - // } - // } - /* end: Naive ordered compare */ - - fIgnoreStartsWith= old_isw; - - fDT[indexOfLN(x)][indexOfRN(y)]= distance; - fDT_Matchings[indexOfLN(x)][indexOfRN(y)]= DTMatching; - return distance; - - } - - /* matches two trees according to paper "X-Diff", p. 16 */ - public void match( - XMLNode LeftTree, - XMLNode RightTree, - boolean rightTreeIsAncestor, - IProgressMonitor monitor) - throws InterruptedException { - - //if (monitor != null) monitor.beginTask("",10); - fH= new HungarianMethod(); - fNLeft= new Vector(); - //numbering LeftTree: Mapping nodes in LeftTree to numbers to be used as array indexes - fNRight= new Vector(); - //numbering RightTree: Mapping nodes in RightTree to numbers to be used as array indexes - numberNodes(LeftTree, fNLeft); - numberNodes(RightTree, fNRight); - fDT= new int[fNLeft.size()][fNRight.size()]; - fDT_Matchings= new ArrayList[fNLeft.size()][fNRight.size()]; - for (int i= 0; i < fDT.length; i++) { - fDT[i]= new int[fNRight.size()]; - for (int j= 0; j < fDT[0].length; j++) { - fDT[i][j]= NO_ENTRY; - } - } - - ArrayList NLeft= new ArrayList(); - ArrayList NRight= new ArrayList(); - findLeaves(LeftTree, NLeft); - findLeaves(RightTree, NRight); - - /* Matching Algorithm */ - /* Step 1: Compute editing distance for (LeftTree -> RightTree)*/ - while (!NLeft.isEmpty() || !NRight.isEmpty()) { - for (ListIterator itNLeft= NLeft.listIterator(); - itNLeft.hasNext(); - ) { - XMLNode x= (XMLNode) itNLeft.next(); - for (ListIterator itNRight= NRight.listIterator(); - itNRight.hasNext(); - ) { - XMLNode y= (XMLNode) itNRight.next(); - if (x.getSignature().equals(y.getSignature())) { - // System.out.println("x: "+x.getName()); - // System.out.println("y: "+y.getName()); - if (monitor != null && monitor.isCanceled()) { - // throw new OperationCanceledException(); - throw new InterruptedException(); - // return; - } - - //if signature starts with root>project - //do not calculate dist - - //if signature is root>project - //do ordered search on children - //do unordered search on childrenb - - dist(x, y); - } - } - } - ArrayList NLeft_new= new ArrayList(); - ArrayList NRight_new= new ArrayList(); - for (ListIterator itNLeft= NLeft.listIterator(); - itNLeft.hasNext(); - ) { - XMLNode node= (XMLNode) itNLeft.next(); - if (node.getParent() != null - && !NLeft_new.contains(node.getParent())) - NLeft_new.add(node.getParent()); - } - for (ListIterator itNRight= NRight.listIterator(); - itNRight.hasNext(); - ) { - XMLNode node= (XMLNode) itNRight.next(); - if (node.getParent() != null - && !NRight_new.contains(node.getParent())) - NRight_new.add(node.getParent()); - } - NLeft= NLeft_new; - NRight= NRight_new; - } - //end of Step1 - /* Step 2: mark matchings on LeftTree and RightTree */ - fMatches= new Vector(); - if (!LeftTree.getSignature().equals(RightTree.getSignature())) { - //matching is empty - } else { - fMatches.add(new Match(LeftTree, RightTree)); - for (int i_M= 0; i_M < fMatches.size(); i_M++) { - Match m= (Match) fMatches.elementAt(i_M); - if (!isLeaf(m.fx) && !isLeaf(m.fy)) { - // if (fDT_Matchings[ indexOfLN(m.fx) ][ indexOfRN(m.fy) ] == null) - // System.out.println("Error: ID not unique for " + m.fx.getId()); - // else - // fMatches.addAll(fDT_Matchings[ indexOfLN(m.fx) ][ indexOfRN(m.fy) ]); - if (fDT_Matchings[indexOfLN(m.fx)][indexOfRN(m.fy)] - != null) - fMatches.addAll( - fDT_Matchings[indexOfLN(m.fx)][indexOfRN(m.fy)]); - } - } - } - //end of Step2 - /* Renumber Id of Nodes to follow Matches. Or for ancestor, copy over Id to ancestor */ - if (rightTreeIsAncestor) { - for (ListIterator it_M= fMatches.listIterator(); it_M.hasNext();) { - Match m= (Match) it_M.next(); - if (m.fx != null && m.fy != null) - m.fy.setId(m.fx.getId()); - } - } else { - int newId= 0; - for (ListIterator it_M= fMatches.listIterator(); - it_M.hasNext(); - newId++) { - Match m= (Match) it_M.next(); - if (m.fx != null) - m.fx.setId(Integer.toString(newId)); - if (m.fy != null) - m.fy.setId(Integer.toString(newId)); - // System.out.println("Matching: "+ ((m.fx != null)?m.fx.getOrigId():"null")+" -> "+((m.fx != null)?m.fx.getId():"null")+" , "+((m.fy != null)?m.fy.getOrigId():"null")+" -> "+((m.fy != null)?m.fy.getId():"null")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ - } - } - //if (monitor != null) monitor.done(); - } - - protected int handleXandYnotLeaves(XMLNode x, XMLNode y) { - int ret= NO_ENTRY; - Object[] xc_orig= x.getChildren(); - Object[] yc_orig= y.getChildren(); - - /* handle ordered entries */ - if (fUseOrdered) { - boolean starts_with_sig= false; - boolean equals_sig= false; - String x_sig= x.getSignature(); - String y_sig= y.getSignature(); - - int i_ordered; - - if (!fIgnoreStartsWith) { - /* Normal case when algorithm runs. - * Algorithm runs bottom up from leaves to root. - * check x_sig.startsWith(fOrdered[i_ordered]) || y_sig.startsWith(fOrdered[i_ordered]) - * because if this is the case and - * !(x_sig.equals(fOrdered[j_ordered]+SIGN_ELEMENT) && y_sig.equals(fOrdered[j_ordered]+SIGN_ELEMENT)) - * i.e. the nodes are not marked for an ordered compare but x and/or y has an ancestor that is, - * then nodes x and/or y will be handled by that ancestor in orderedMatch(), which is a top-down algorithm. - * Thus, we exit the procedure dist() if this is the case. - */ - for (i_ordered= 0; i_ordered < fOrdered.length; i_ordered++) { - if (x_sig.startsWith(fOrdered[i_ordered]) - || y_sig.startsWith(fOrdered[i_ordered])) { - starts_with_sig= true; - if (x_sig.equals(y_sig)) { - for (int j_ordered= i_ordered; - j_ordered < fOrdered.length; - j_ordered++) { - if (x_sig - .equals( - fOrdered[j_ordered] + SIGN_ELEMENT)) { - equals_sig= true; - break; - } - break; - } - } - } - } - - if (starts_with_sig) { - if (equals_sig) { - return orderedMath(x, y); - } else { - return ret; - } - } - - } else { - /* when inside orderedMatch(x, y), algorithm runs recursively from a node to the leaves of the - * subtree rooted at this node. - * In this case we do not check x_sig.startsWith(fOrdered[i_ordered]) || y_sig.startsWith(fOrdered[i_ordered]) - */ - if (x_sig.equals(y_sig)) { - for (i_ordered= 0; - i_ordered < fOrdered.length; - i_ordered++) { - if (x_sig.equals(fOrdered[i_ordered] + SIGN_ELEMENT)) { - equals_sig= true; - break; - } - } - } - - if (equals_sig) { - return orderedMath(x, y); - } - } - - } - /* end of handle ordered entries */ - - return unorderedMatch(x, y, xc_orig, yc_orig); - } - -} |