Skip to main content
aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Wolf2019-08-21 17:32:30 -0400
committerLars Vogel2019-09-10 12:46:01 -0400
commit340d36aec95f8b2cf3fccdb04c63cbaf8ae9c5a8 (patch)
tree79eef58239f2e88003c275925da9f7781433bcdb
parent0a4f6ad2be2ec08cddae6952c33bd73c3c7f9b13 (diff)
downloadeclipse.platform.text-340d36aec95f8b2cf3fccdb04c63cbaf8ae9c5a8.tar.gz
eclipse.platform.text-340d36aec95f8b2cf3fccdb04c63cbaf8ae9c5a8.tar.xz
eclipse.platform.text-340d36aec95f8b2cf3fccdb04c63cbaf8ae9c5a8.zip
Bug 550438 - MultiStringMatcher (leftmost-longest): exit earlier
We may return an existing sub-match right away on a failover if its offset is smaller than the start offset of the path we switched to. Store the node depth in the trie nodes; this gives us access to this information. New tests that verify where in the text searched the algorithm stops searching. scan003 is the test that motivated this change. The new code stops scanning at index 5, while the old code did this check only on the final match on the new path at index 10. Also slightly simplify the implementation of indexOf() by removing the 'failover' flag. Change-Id: I3066680450a2d5027703e06afba7833b81d12127 Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
-rw-r--r--org.eclipse.text.tests/META-INF/MANIFEST.MF2
-rw-r--r--org.eclipse.text.tests/pom.xml2
-rw-r--r--org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java74
-rw-r--r--org.eclipse.text/META-INF/MANIFEST.MF2
-rw-r--r--org.eclipse.text/pom.xml2
-rw-r--r--org.eclipse.text/src/org/eclipse/jface/text/MultiStringMatcher.java57
6 files changed, 108 insertions, 31 deletions
diff --git a/org.eclipse.text.tests/META-INF/MANIFEST.MF b/org.eclipse.text.tests/META-INF/MANIFEST.MF
index 4c085e469..f7cfc57cb 100644
--- a/org.eclipse.text.tests/META-INF/MANIFEST.MF
+++ b/org.eclipse.text.tests/META-INF/MANIFEST.MF
@@ -2,7 +2,7 @@ Manifest-Version: 1.0
Bundle-ManifestVersion: 2
Bundle-Name: %Plugin.name
Bundle-SymbolicName: org.eclipse.text.tests
-Bundle-Version: 3.12.300.qualifier
+Bundle-Version: 3.12.400.qualifier
Bundle-Vendor: %Plugin.providerName
Bundle-Localization: plugin
Export-Package:
diff --git a/org.eclipse.text.tests/pom.xml b/org.eclipse.text.tests/pom.xml
index 93b47d64b..8fd65f6d3 100644
--- a/org.eclipse.text.tests/pom.xml
+++ b/org.eclipse.text.tests/pom.xml
@@ -19,7 +19,7 @@
</parent>
<groupId>org.eclipse.text</groupId>
<artifactId>org.eclipse.text.tests</artifactId>
- <version>3.12.300-SNAPSHOT</version>
+ <version>3.12.400-SNAPSHOT</version>
<packaging>eclipse-test-plugin</packaging>
<properties>
<testSuite>${project.artifactId}</testSuite>
diff --git a/org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java b/org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java
index b50f57725..54a5bbeed 100644
--- a/org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java
+++ b/org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java
@@ -30,11 +30,17 @@ public class MultiStringMatcherTest {
public ExpectedException thrown = ExpectedException.none();
private static Match run(String text, String... needles) {
- return MultiStringMatcher.indexOf(text, 0, needles);
+ return run(text, 0, needles);
}
private static Match run(String text, int offset, String... needles) {
- return MultiStringMatcher.indexOf(text, offset, needles);
+ return run(new TestCharSequence(text), offset, needles);
+ }
+
+ private static Match run(TestCharSequence text, int offset, String... needles) {
+ Match result = MultiStringMatcher.indexOf(text, offset, needles);
+ assertEquals("Algorithm backtracked", 0, text.getBackTrack());
+ return result;
}
private static void test(Match m, String expected, int index) {
@@ -449,4 +455,68 @@ public class MultiStringMatcherTest {
thrown.expect(IllegalStateException.class);
b.build();
}
+
+ @Test
+ public void scan001() throws Exception {
+ TestCharSequence text = new TestCharSequence("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
+ Match m = run(text, 0, "x", "xx", "xxx", "xxxx");
+ test(m, "xxxx", 0);
+ assertEquals("Scanned too far", 3, text.getLastIndex());
+ }
+
+ @Test
+ public void scan002() throws Exception {
+ TestCharSequence text = new TestCharSequence("ddcababababababcabxdd");
+ Match m = run(text, 0, "ca", "cabx", "ababc");
+ test(m, "ca", 2);
+ assertEquals("Scanned too far", 5, text.getLastIndex());
+ }
+
+ @Test
+ public void scan003() throws Exception {
+ TestCharSequence text = new TestCharSequence("ddcabarbarazz");
+ Match m = run(text, 0, "a", "cabby", "barbara");
+ test(m, "a", 3);
+ assertEquals("Scanned too far", 5, text.getLastIndex());
+ }
+
+ private static class TestCharSequence implements CharSequence {
+
+ private final String value;
+
+ private int lastIndex = -1;
+
+ private int backtrack = 0;
+
+ public TestCharSequence(String value) {
+ this.value = value;
+ }
+
+ @Override
+ public int length() {
+ return value.length();
+ }
+
+ @Override
+ public char charAt(int index) {
+ if (index < lastIndex) {
+ backtrack = Math.min(backtrack, index - lastIndex);
+ }
+ lastIndex = index;
+ return value.charAt(index);
+ }
+
+ @Override
+ public CharSequence subSequence(int start, int end) {
+ throw new UnsupportedOperationException();
+ }
+
+ public int getLastIndex() {
+ return lastIndex;
+ }
+
+ public int getBackTrack() {
+ return backtrack;
+ }
+ }
}
diff --git a/org.eclipse.text/META-INF/MANIFEST.MF b/org.eclipse.text/META-INF/MANIFEST.MF
index 4ee82ebb2..0f792c4f9 100644
--- a/org.eclipse.text/META-INF/MANIFEST.MF
+++ b/org.eclipse.text/META-INF/MANIFEST.MF
@@ -2,7 +2,7 @@ Manifest-Version: 1.0
Bundle-ManifestVersion: 2
Bundle-Name: %pluginName
Bundle-SymbolicName: org.eclipse.text
-Bundle-Version: 3.9.0.qualifier
+Bundle-Version: 3.9.100.qualifier
Bundle-Vendor: %providerName
Bundle-Localization: plugin
Export-Package:
diff --git a/org.eclipse.text/pom.xml b/org.eclipse.text/pom.xml
index d3c48bb9d..e019dabe6 100644
--- a/org.eclipse.text/pom.xml
+++ b/org.eclipse.text/pom.xml
@@ -18,6 +18,6 @@
</parent>
<groupId>org.eclipse.text</groupId>
<artifactId>org.eclipse.text</artifactId>
- <version>3.9.0-SNAPSHOT</version>
+ <version>3.9.100-SNAPSHOT</version>
<packaging>eclipse-plugin</packaging>
</project>
diff --git a/org.eclipse.text/src/org/eclipse/jface/text/MultiStringMatcher.java b/org.eclipse.text/src/org/eclipse/jface/text/MultiStringMatcher.java
index 68128e1d2..a98d82d44 100644
--- a/org.eclipse.text/src/org/eclipse/jface/text/MultiStringMatcher.java
+++ b/org.eclipse.text/src/org/eclipse/jface/text/MultiStringMatcher.java
@@ -194,6 +194,12 @@ public class MultiStringMatcher {
Node output;
+ final int depth;
+
+ Node(int depth) {
+ this.depth= depth;
+ }
+
Node next(Character c) {
return children == null ? null : children.get(c);
}
@@ -202,7 +208,7 @@ public class MultiStringMatcher {
if (children == null) {
children= new HashMap<>();
}
- return children.computeIfAbsent(Character.valueOf(c), key -> new Node());
+ return children.computeIfAbsent(Character.valueOf(c), key -> new Node(depth + 1));
}
boolean hasChildren() {
@@ -211,13 +217,14 @@ public class MultiStringMatcher {
@Override
public String toString() {
- return "Match: " + (match == null ? "null" : '>' + match + '<') //$NON-NLS-1$ //$NON-NLS-2$
- + " Children: " + (children == null ? "<none>" : children.keySet().stream().map(c -> c.toString()).collect(Collectors.joining(", "))); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
+ return "[depth=" + depth + ", match=" + match //$NON-NLS-1$ //$NON-NLS-2$
+ + ", children=" + (children == null ? "<none>" : children.keySet().stream().map(c -> c.toString()).collect(Collectors.joining(", "))) //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
+ + ']';
}
}
/** Root node of the trie. */
- private final Node root= new Node() {
+ private final Node root= new Node(0) {
@Override
Node next(Character c) {
// Implements the sentinel loop on the root node for all non-matching characters.
@@ -251,7 +258,7 @@ public class MultiStringMatcher {
// s, r, and state are kept as in the paper.
List<Node> queue= new LinkedList<>();
for (Node s : root.children.values()) {
- if (s.children != null) {
+ if (s.hasChildren()) {
// No need to queue nodes without children since we don't do anything
// with them anyway.
queue.add(s);
@@ -263,7 +270,7 @@ public class MultiStringMatcher {
for (Map.Entry<Character, Node> entry : r.children.entrySet()) {
Character c= entry.getKey();
Node s= entry.getValue();
- if (s.children != null) {
+ if (s.hasChildren()) {
queue.add(s);
}
Node state= r.fail;
@@ -321,41 +328,41 @@ public class MultiStringMatcher {
int textEnd= text.length();
Match primaryMatch= null;
Match subMatch= null;
- boolean failover= false;
Node node= root;
for (int i= offset; i < textEnd; i++) {
Character c= Character.valueOf(text.charAt(i));
Node next= node.next(c);
if (next == null) {
- // Fell off the trie.
+ // Can't continue on this path.
if (primaryMatch != null) {
// Return primary match because any other match must have a higher offset.
return primaryMatch;
}
- // Search for other trie to change to.
+ // Search for another path to continue matching.
do {
node= node.fail;
} while ((next= node.next(c)) == null);
- failover= (node != root);
- if (!failover && subMatch != null) {
- // We fell of the trie and could not switch to another. Return best sub-match
- // if possible.
- return subMatch;
+ if (subMatch != null) {
+ if (next == root) {
+ // We fell off the trie and could not switch to another. Return the best
+ // sub-match.
+ return subMatch;
+ } else if (subMatch.getOffset() < i - node.depth) {
+ // The new path starts at i - node.depth == i - next.depth + 1, so if a
+ // sub-match is earlier, we may return it. Any primary match on this path
+ // or on any other path we might switch to later on will have a higher
+ // offset, and so will any sub-matches we might discover on these paths.
+ return subMatch;
+ }
}
}
node= next;
if (node.match != null) {
- int newOffset= i - node.match.length() + 1;
- // On a failover trie the sub match can be better.
- // And if it is return it because nothing better will follow.
- if (failover && subMatch != null && subMatch.getOffset() < newOffset) {
- return subMatch;
- }
// Any new primary match is better because all have the same offset but any new one
- // must be longer.
- primaryMatch= new MatchResult(node.match, newOffset);
+ // must be longer. An existing sub-match from a previous path is checked above.
+ primaryMatch= new MatchResult(node.match, i - node.depth + 1);
if (!node.hasChildren()) {
- // We will fall off the trie on the next character, so we can return right here
+ // We will fall off the trie on the next character, so we can return right here.
return primaryMatch;
}
}
@@ -364,10 +371,10 @@ public class MultiStringMatcher {
if (primaryMatch == null) {
Node out= node.output;
if (out != null) {
- int newOffset= i - out.match.length() + 1;
+ int newOffset= i - out.depth + 1;
if (subMatch == null
|| newOffset < subMatch.getOffset()
- || (newOffset == subMatch.getOffset() && out.match.length() > subMatch.getText().length())) {
+ || (newOffset == subMatch.getOffset() && out.depth > subMatch.getText().length())) {
subMatch= new MatchResult(out.match, newOffset);
}
}

Back to the top