From ce440ff947d28209813d9a3a51d78554d0dfe036 Mon Sep 17 00:00:00 2001 From: cbrun Date: Tue, 7 May 2013 16:33:25 +0200 Subject: Avoiding Complexity Explosion This commit introduces a new mechanism to limit the CPU complexity of the match by content algorithm when there are many EObjects of the same type to match. It introduces a new interface EObjectIndex's might implement : MatchAheadOfTime. If an index implements this interface it will be asked by the Matcher, from time to time, to provide elements to be matched whereas the whole scope has not been processed yet. The default implementation in ProximityIndex checks for the number of elements kept in its storage and depending on this number decides to return elements to match ahead or not. This effectively cut the CPU complexity on models having for instance 10K elements of the same type, this has a drawback though : when this mechanism is triggered, we can no longer guarantee the match will be the "best match ever" as some elements have been matched before all the candidates where known. ProximityEObjectMatcher.NB_ELEMENTS_BETWEEN_MATCH_AHEAD = 10 000 means every 10 000 elements the matcher will ask the indexes for elements to match ahead. ProximityIndex.SEARCH_WINDOW = 1000 means a given index (which holds all the elements of a given type) will only return elements to match ahead if it has more than 1000 elements to match already. These values means the match ahead will have no impact on models which have less than 10 000 elements or which don't have more than 1000 elements of the same kind. Matching guava.uml (50K elements) goes from 50s to 11s to match the model 10 times. Change-Id: I0f9c6f536336aea1742389c991e3c672a67a154c (cherry picked from commit 58056c73a4fc086d5face4aa385115ba74fcedfe) --- .../match/eobject/ProximityEObjectMatcher.java | 161 +++++++++++++-------- .../match/eobject/internal/ByTypeIndex.java | 14 +- .../match/eobject/internal/MatchAheadOfTime.java | 31 ++++ .../match/eobject/internal/ProximityIndex.java | 19 ++- 4 files changed, 165 insertions(+), 60 deletions(-) create mode 100644 plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/MatchAheadOfTime.java diff --git a/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/ProximityEObjectMatcher.java b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/ProximityEObjectMatcher.java index cd8efc4f7..d5cc7616f 100644 --- a/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/ProximityEObjectMatcher.java +++ b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/ProximityEObjectMatcher.java @@ -12,10 +12,12 @@ package org.eclipse.emf.compare.match.eobject; import com.google.common.collect.ImmutableList; import com.google.common.collect.Iterators; +import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.Set; @@ -27,6 +29,7 @@ import org.eclipse.emf.compare.Comparison; import org.eclipse.emf.compare.Match; import org.eclipse.emf.compare.match.eobject.EObjectIndex.Side; import org.eclipse.emf.compare.match.eobject.internal.ByTypeIndex; +import org.eclipse.emf.compare.match.eobject.internal.MatchAheadOfTime; import org.eclipse.emf.ecore.EObject; /** @@ -46,6 +49,10 @@ import org.eclipse.emf.ecore.EObject; * @author Cedric Brun */ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { + /** + * Number of elements to index before a starting a match ahead step. + */ + private static final int NB_ELEMENTS_BETWEEN_MATCH_AHEAD = 10000; /** * The index which keep the EObjects. @@ -79,6 +86,7 @@ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { Monitor subMonitor = new BasicMonitor(); subMonitor.beginTask("indexing objects", 1); int nbElements = 0; + int lastSegment = 0; /* * We are iterating through the three sides of the scope at the same time so that index might apply * pre-matching strategies elements if they wish. @@ -94,17 +102,20 @@ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { if (rightEObjects.hasNext()) { EObject next = rightEObjects.next(); - nbElements++; index.index(next, Side.RIGHT); eObjectsToSide.put(next, Side.RIGHT); } if (originEObjects.hasNext()) { EObject next = originEObjects.next(); - nbElements++; index.index(next, Side.ORIGIN); eObjectsToSide.put(next, Side.ORIGIN); } + if (nbElements / NB_ELEMENTS_BETWEEN_MATCH_AHEAD > lastSegment) { + matchAheadOfTime(comparison, subMonitor); + lastSegment++; + } + } subMonitor.worked(1); @@ -114,15 +125,58 @@ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { subMonitor = new BasicMonitor(); subMonitor.beginTask("matching objects", nbElements); + matchIndexedObjects(comparison, subMonitor); + + createUnmatchesForRemainingObjects(comparison); + subMonitor.done(); + restructureMatchModel(comparison); + + } + + /** + * If the index supports it, match element ahead of time, in case of failure the elements are kept and + * will be processed again later on. + * + * @param comparison + * the current comoparison. + * @param monitor + * monitor to track progress. + */ + private void matchAheadOfTime(Comparison comparison, Monitor monitor) { + if (index instanceof MatchAheadOfTime) { + matchList(comparison, ((MatchAheadOfTime)index).getValuesToMatchAhead(Side.LEFT), false, monitor); + matchList(comparison, ((MatchAheadOfTime)index).getValuesToMatchAhead(Side.RIGHT), false, monitor); + } + } + + /** + * Match elements for real, if no match is found for an element, an object will be created to represent + * this unmatch and the element will not be processed again. + * + * @param comparison + * the current comparison. + * @param subMonitor + * monitor to track progress. + */ + private void matchIndexedObjects(Comparison comparison, Monitor subMonitor) { Iterable todo = index.getValuesStillThere(Side.LEFT); while (todo.iterator().hasNext()) { - todo = matchList(comparison, todo, subMonitor); + todo = matchList(comparison, todo, true, subMonitor); } todo = index.getValuesStillThere(Side.RIGHT); while (todo.iterator().hasNext()) { - todo = matchList(comparison, todo, subMonitor); + todo = matchList(comparison, todo, true, subMonitor); } + } + + /** + * Create all the Match objects for the remaining EObjects. + * + * @param comparison + * the current comparison. + */ + private void createUnmatchesForRemainingObjects(Comparison comparison) { for (EObject notFound : index.getValuesStillThere(Side.RIGHT)) { areMatching(comparison, null, notFound, null); } @@ -132,10 +186,6 @@ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { for (EObject notFound : index.getValuesStillThere(Side.ORIGIN)) { areMatching(comparison, null, null, notFound); } - - subMonitor.done(); - restructureMatchModel(comparison); - } /** @@ -147,53 +197,46 @@ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { * the comparison being built. * @param todoList * the list of objects to process. + * @param createUnmatches + * whether elements which have no match should trigger the creation of a Match object (meaning + * we won't try to match them afterwards) or not. * @param monitor * a monitor to track progress. * @return the list of EObjects which could not be processed for some reason. */ - private Iterable matchList(Comparison comparison, Iterable todoList, Monitor monitor) { - Set remaining = Sets.newLinkedHashSet(); - Iterator containersAndTodo = todoList.iterator(); - while (containersAndTodo.hasNext()) { - EObject next = containersAndTodo.next(); - if (!matchEObjectAndItsContainers(comparison, next)) { - remaining.add(next); + private Iterable matchList(Comparison comparison, Iterable todoList, + boolean createUnmatches, Monitor monitor) { + Set remainingResult = Sets.newLinkedHashSet(); + List requiredContainers = Lists.newArrayList(); + Iterator todo = todoList.iterator(); + while (todo.hasNext()) { + EObject next = todo.next(); + /* + * Let's first add every container which is in scope + */ + EObject container = next.eContainer(); + while (container != null && isInScope(container)) { + if (comparison.getMatch(container) == null) { + requiredContainers.add(0, container); + } + container = container.eContainer(); } } - return remaining; - } - - /** - * This method might call itself recursively going up to the containers which are in the scope. It will - * first make sure all the containers of a given EObject are already matched, and then match the EObject. - * The process might fail for some reason (for instance, a prerequisite container of RIGHT has not been - * processed yet). In that case it will return false. - * - * @param comparison - * the comparison being built. - * @param eObject - * any EObject. - * @return true if the while matching has been completed, false if not. - */ - private boolean matchEObjectAndItsContainers(Comparison comparison, EObject eObject) { - boolean completed = true; - if (comparison.getMatch(eObject) == null) { - EObject container = eObject.eContainer(); - if (container != null && isInScope(container)) { - /* - * Let's first match all the container chain, if the container has been already matched the - * call to matchEObjectAndItsContainers() will directly return true. - */ - completed = matchEObjectAndItsContainers(comparison, container); - } + Iterator containersAndTodo = Iterators.concat(requiredContainers.iterator(), todoList + .iterator()); + while (containersAndTodo.hasNext()) { + EObject next = containersAndTodo.next(); /* - * No point in trying to match the EObject if the container has not been matched already . + * At this point you need to be sure the element has not been matched in any other way before. */ - if (completed) { - completed = tryToMatch(comparison, eObject); + if (comparison.getMatch(next) == null) { + if (!tryToMatch(comparison, next, createUnmatches)) { + remainingResult.add(next); + monitor.worked(1); + } } } - return completed; + return remainingResult; } /** @@ -206,9 +249,13 @@ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { * the comparison under construction, it will be updated with the new match. * @param a * object to match. + * @param createUnmatches + * whether elements which have no match should trigger the creation of a Match object (meaning + * we won't try to match them afterwards) or not. * @return false if the conditions are not fulfilled to create the match, true otherwhise. */ - private boolean tryToMatch(Comparison comparison, EObject a) { + private boolean tryToMatch(Comparison comparison, EObject a, boolean createUnmatches) { + boolean okToMatch = false; Side aSide = eObjectsToSide.get(a); assert aSide != null; Side bSide = Side.LEFT; @@ -230,20 +277,18 @@ public class ProximityEObjectMatcher implements IEObjectMatcher, ScopeQuery { if (closests != null) { EObject lObj = closests.get(bSide); EObject aObj = closests.get(cSide); - areMatching(comparison, closests.get(Side.LEFT), closests.get(Side.RIGHT), closests - .get(Side.ORIGIN)); - if (lObj != null) { - index.remove(lObj, Side.LEFT); - } - if (aObj != null) { - index.remove(aObj, Side.ORIGIN); - } - if (a != null) { - index.remove(a, Side.RIGHT); + if (lObj != null || aObj != null) { + // we have at least one other match + areMatching(comparison, closests.get(Side.LEFT), closests.get(Side.RIGHT), closests + .get(Side.ORIGIN)); + okToMatch = true; + } else if (createUnmatches) { + areMatching(comparison, closests.get(Side.LEFT), closests.get(Side.RIGHT), closests + .get(Side.ORIGIN)); + okToMatch = true; } - return true; } - return false; + return okToMatch; } /** diff --git a/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ByTypeIndex.java b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ByTypeIndex.java index 84e98299a..30da50009 100644 --- a/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ByTypeIndex.java +++ b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ByTypeIndex.java @@ -31,7 +31,7 @@ import org.eclipse.emf.ecore.EObject; * * @author Cedric Brun */ -public class ByTypeIndex implements EObjectIndex { +public class ByTypeIndex implements EObjectIndex, MatchAheadOfTime { /** * All the type specific indexes, created on demand. */ @@ -140,4 +140,16 @@ public class ByTypeIndex implements EObjectIndex { typeSpecificIndex.index(eObjs, side); } + /** + * {@inheritDoc} + */ + public Iterable getValuesToMatchAhead(Side side) { + List> allLists = Lists.newArrayList(); + for (MatchAheadOfTime typeSpecificIndex : Iterables.filter(allIndexes.values(), + MatchAheadOfTime.class)) { + allLists.add(typeSpecificIndex.getValuesToMatchAhead(side)); + } + return Iterables.concat(allLists); + } + } diff --git a/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/MatchAheadOfTime.java b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/MatchAheadOfTime.java new file mode 100644 index 000000000..ea704a10a --- /dev/null +++ b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/MatchAheadOfTime.java @@ -0,0 +1,31 @@ +/******************************************************************************* + * Copyright (c) 2013 Obeo. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Obeo - initial API and implementation + *******************************************************************************/ +package org.eclipse.emf.compare.match.eobject.internal; + +import org.eclipse.emf.compare.match.eobject.EObjectIndex.Side; +import org.eclipse.emf.ecore.EObject; + +/** + * Implementators of this interface might provide elements to be matched "ahead of time". "Ahead of Time" + * means : before the whole scope got resolved. + * + * @author Cedric Brun + */ +public interface MatchAheadOfTime { + /** + * Return EObjects to match ahead of time if there are some. + * + * @param side + * the side to look for. + * @return the EObjects to match ahead of time. + */ + Iterable getValuesToMatchAhead(Side side); +} diff --git a/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ProximityIndex.java b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ProximityIndex.java index 9a3185443..09dd6e91b 100644 --- a/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ProximityIndex.java +++ b/plugins/org.eclipse.emf.compare/src/org/eclipse/emf/compare/match/eobject/internal/ProximityIndex.java @@ -42,7 +42,13 @@ import org.eclipse.emf.ecore.EObject; * * @author Cedric Brun */ -public class ProximityIndex implements EObjectIndex { +public class ProximityIndex implements EObjectIndex, MatchAheadOfTime { + /** + * The number of elements until which the index will provide elements to match ahead of time. This is done + * to avoid algorithm complexity explosion. This is clearly a tradeoff as matching ahead of time means + * some "better match" might not be found. + */ + private static final int SEARCH_WINDOW = 1000; /** * The distance function used to compare the Objects. @@ -344,4 +350,15 @@ public class ProximityIndex implements EObjectIndex { } } + /** + * {@inheritDoc} + */ + public Iterable getValuesToMatchAhead(Side left) { + Collection indexedValues = getValuesStillThere(left); + if (indexedValues.size() > SEARCH_WINDOW) { + return indexedValues; + } + return Collections.emptyList(); + } + } -- cgit v1.2.3