Skip to main content
aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Wolf2019-08-17 10:53:30 +0000
committerPaul Pazderski2019-08-24 12:58:34 +0000
commite50ee43b2cf76c97c833fe400539c00f9ebe8fc7 (patch)
tree9e1c5b8cd936580c9e2e1a698f58e662c343937b /org.eclipse.text.tests
parentc58c96bcb725d38350343f31c7a8988853f19709 (diff)
downloadeclipse.platform.text-e50ee43b2cf76c97c833fe400539c00f9ebe8fc7.tar.gz
eclipse.platform.text-e50ee43b2cf76c97c833fe400539c00f9ebe8fc7.tar.xz
eclipse.platform.text-e50ee43b2cf76c97c833fe400539c00f9ebe8fc7.zip
Bug 545252 - ConfigurableLineTracker: use the Aho-Corasick algorithmI20190826-0640I20190826-0415
The ConfigurableLineTracker had quadratic performance characteristics, making line-end detection even only with the standard line terminators extremely slow already on moderately sized sources. Add an efficient MultiStringMatcher using the Aho-Corasick algorithm to efficiently find any of a fixed set of strings in a text. The algorithm is a well-described efficient general-purpose multi-string matching algorithm based on a trie. Its main search loop is very simple; the more tricky bits are in the construction of the auxiliary links in the trie. The algorithm as described in 1975 returns all matches including overlaps within a text, but it can be adapted fairly easily to support the "leftmost longest match" semantics needed by ConfigurableLineTracker. The Aho-Corasick algorithm has a complexity of O(n + m + z), where n is the text length, m the sum of the length of patterns, and z the number of matches. For line end matching, this is roughly O(n + L), where L is the number of lines. Note that it can produce a quadratic number of matches on degenerate input (patterns "x", "xx", "xxx", and so on, and a text consisting only of "x"s). Adapt ConfigurableLineTracker to use the new MultiStringMatcher. The matcher provides a find() operation to find all (possibly overlapping) occurrences of the pattern strings; this is the normal way the Aho-Corasick algorithm works. An indexOf() method provides a way to efficiently find only the leftmost longest match. Since this new matcher is generally useful and fast, provide a builder interface through which pattern strings can be added iteratively before matching. Change-Id: I3e66482cfca271248caaad79157f843bc2c819d7 Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch> Also-by: Paul Pazderski <paul-eclipse@ppazderski.de>
Diffstat (limited to 'org.eclipse.text.tests')
-rw-r--r--org.eclipse.text.tests/src/org/eclipse/text/tests/ConfigurableLineTrackerTest.java96
-rw-r--r--org.eclipse.text.tests/src/org/eclipse/text/tests/EclipseTextTestSuite.java2
-rw-r--r--org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java452
-rw-r--r--org.eclipse.text.tests/src/org/eclipse/text/tests/TextUtilitiesTest.java3
4 files changed, 552 insertions, 1 deletions
diff --git a/org.eclipse.text.tests/src/org/eclipse/text/tests/ConfigurableLineTrackerTest.java b/org.eclipse.text.tests/src/org/eclipse/text/tests/ConfigurableLineTrackerTest.java
new file mode 100644
index 00000000000..fe6f597485f
--- /dev/null
+++ b/org.eclipse.text.tests/src/org/eclipse/text/tests/ConfigurableLineTrackerTest.java
@@ -0,0 +1,96 @@
+/*******************************************************************************
+ * Copyright (c) 2019 Thomas Wolf and others.
+ *
+ * This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License 2.0
+ * which accompanies this distribution, and is available at
+ * https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *******************************************************************************/
+package org.eclipse.text.tests;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import org.eclipse.jface.text.ConfigurableLineTracker;
+import org.eclipse.jface.text.DefaultLineTracker;
+import org.eclipse.jface.text.GapTextStore;
+
+public class ConfigurableLineTrackerTest extends AbstractLineTrackerTest {
+
+ @Before
+ public void setUp() {
+ fText= new GapTextStore();
+ }
+
+ @After
+ public void tearDown() {
+ fTracker= null;
+ fText= null;
+ }
+
+ /**
+ * Expected length of the tracked delimiter. First entry is length of first
+ * delimiter and so on.
+ */
+ private int[] lineEndLengths;
+
+ @Override
+ protected int getLineOffset(int line, int[] lines) {
+ int offset= 0;
+ for (int i= 0; i < line; i++) {
+ offset += (lines[i] + lineEndLengths[i]);
+ }
+ return offset;
+ }
+
+ private void setLegalDelimiters(String... delimiters) {
+ fTracker = new ConfigurableLineTracker(delimiters);
+ }
+
+ @Test
+ public void testLongestMatch() throws Exception {
+ setLegalDelimiters(DefaultLineTracker.DELIMITERS);
+ set("xxxx\r\nxx");
+ lineEndLengths = new int[] {2};
+ checkLines(new int[] {4, 2});
+ }
+
+ @Test
+ public void testMixedDefaultDelimiter() throws Exception {
+ setLegalDelimiters(DefaultLineTracker.DELIMITERS);
+ set("1234\r\n123\r1234\n1234567\r");
+ lineEndLengths = new int[] { 2, 1, 1, 1 };
+ checkLines(new int[] { 4, 3, 4, 7, 0 });
+ set("first\r\r\nthird");
+ lineEndLengths = new int[] { 1, 2 };
+ checkLines(new int[] { 5, 0, 5 });
+ }
+
+ @Test
+ public void testSingleDelimiter() throws Exception {
+ setLegalDelimiters("\n");
+ set("xxxx\nxx");
+ lineEndLengths = new int[] { 1 };
+ checkLines(new int[] { 4, 2 });
+ }
+
+ @Test
+ public void testAlternativeDelimiters1() throws Exception {
+ // create a line tracker with "show whitespace" characters as delimiter
+ setLegalDelimiters("\u00a4", "\u00b6", "\u00a4\u00b6");
+ set("xxxx\u00a4\u00b6xx");
+ lineEndLengths = new int[] { 2 };
+ checkLines(new int[] { 4, 2 });
+ }
+
+ @Test
+ public void testAlternativeDelimiters2() throws Exception {
+ setLegalDelimiters("{NewLine}", "[NewLine]");
+ set("A line{NewLine}AnotherLine[NewLine}A third line[newline]End");
+ lineEndLengths = new int[] { 9 };
+ checkLines(new int[] { 6, 44 });
+ }
+}
diff --git a/org.eclipse.text.tests/src/org/eclipse/text/tests/EclipseTextTestSuite.java b/org.eclipse.text.tests/src/org/eclipse/text/tests/EclipseTextTestSuite.java
index 8e2650685f6..be13f581481 100644
--- a/org.eclipse.text.tests/src/org/eclipse/text/tests/EclipseTextTestSuite.java
+++ b/org.eclipse.text.tests/src/org/eclipse/text/tests/EclipseTextTestSuite.java
@@ -29,6 +29,8 @@ import org.eclipse.text.tests.templates.TemplatesTestSuite;
*/
@RunWith(Suite.class)
@SuiteClasses({
+ MultiStringMatcherTest.class,
+ ConfigurableLineTrackerTest.class,
LineTrackerTest4.class,
DocumentExtensionTest.class,
LineTrackerTest3.class,
diff --git a/org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java b/org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java
new file mode 100644
index 00000000000..b50f57725ef
--- /dev/null
+++ b/org.eclipse.text.tests/src/org/eclipse/text/tests/MultiStringMatcherTest.java
@@ -0,0 +1,452 @@
+/*******************************************************************************
+ * Copyright (c) 2019 Thomas Wolf and others.
+ *
+ * This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License 2.0
+ * which accompanies this distribution, and is available at
+ * https://www.eclipse.org/legal/epl-2.0/
+ *
+ * SPDX-License-Identifier: EPL-2.0
+ *******************************************************************************/
+package org.eclipse.text.tests;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertNull;
+
+import java.util.Collections;
+import java.util.List;
+
+import org.junit.Rule;
+import org.junit.Test;
+import org.junit.rules.ExpectedException;
+
+import org.eclipse.jface.text.MultiStringMatcher;
+import org.eclipse.jface.text.MultiStringMatcher.Match;
+
+public class MultiStringMatcherTest {
+
+ @Rule
+ public ExpectedException thrown = ExpectedException.none();
+
+ private static Match run(String text, String... needles) {
+ return MultiStringMatcher.indexOf(text, 0, needles);
+ }
+
+ private static Match run(String text, int offset, String... needles) {
+ return MultiStringMatcher.indexOf(text, offset, needles);
+ }
+
+ private static void test(Match m, String expected, int index) {
+ assertNotNull("No match", m);
+ assertEquals("Unexpected match", expected, m.getText());
+ assertEquals("Unexpected index", index, m.getOffset());
+ }
+
+ private static void testList(List<Match> matches, String expected) {
+ Collections.sort(matches, (a, b) -> {
+ int cmp = Integer.compare(a.getOffset(), b.getOffset());
+ if (cmp != 0) {
+ return cmp;
+ }
+ return Integer.compare(a.getText().length(), b.getText().length());
+ });
+ assertEquals("Unexpected results", expected, matches.toString());
+ }
+
+ @Test
+ public void test001() throws Exception {
+ Match m = run("dcccacabcccabcc", "ab", "cab");
+ test(m, "cab", 5);
+ }
+
+ @Test
+ public void test002() throws Exception {
+ Match m = run("dcccacabcccabcc", "ab", "abc");
+ test(m, "abc", 6);
+ }
+
+ @Test
+ public void test003() throws Exception {
+ Match m = run("dcccacabcccabcc", "ab", "cxb");
+ test(m, "ab", 6);
+ }
+
+ @Test
+ public void test004() throws Exception {
+ Match m = run("dcccacabcccabcc", "abc", "cabx");
+ test(m, "abc", 6);
+ }
+
+ @Test
+ public void test005() throws Exception {
+ Match m = run("dacabddd", "ac", "cab");
+ test(m, "ac", 1);
+ }
+
+ @Test
+ public void test006() throws Exception {
+ Match m = run("dacabddd", "aca", "cab");
+ test(m, "aca", 1);
+ }
+
+ @Test
+ public void test007() throws Exception {
+ Match m = run("dacabddd", "acab", "cab");
+ test(m, "acab", 1);
+ }
+
+ @Test
+ public void test008() throws Exception {
+ Match m = run("ddddddcac", "ac", "cab");
+ test(m, "ac", 7);
+ }
+
+ @Test
+ public void test009() throws Exception {
+ Match m = run("dddddcacddd", "cacx", "ac");
+ test(m, "ac", 6);
+ }
+
+ @Test
+ public void test010() throws Exception {
+ Match m = run("ddddddcac", "ac", "cac");
+ test(m, "cac", 6);
+ }
+
+ @Test
+ public void test011() throws Exception {
+ Match m = run("a", "a", "b", "ab");
+ test(m, "a", 0);
+ }
+
+ @Test
+ public void test012() throws Exception {
+ Match m = run("b", "a", "b", "ab");
+ test(m, "b", 0);
+ }
+
+ @Test
+ public void test013() throws Exception {
+ Match m = run("ab", "a", "b", "ab");
+ test(m, "ab", 0);
+ }
+
+ @Test
+ public void test014() throws Exception {
+ Match m = run("", "a", "b", "ab");
+ assertNull("Expected no match", m);
+ }
+
+ @Test
+ public void test015() throws Exception {
+ Match m = run("dddca", "ac", "cac", "ab");
+ assertNull("Expected no match", m);
+ }
+
+ @Test
+ public void test016() throws Exception {
+ Match m = run("ab", "ab", "b");
+ test(m, "ab", 0);
+ }
+
+ @Test
+ public void test017() throws Exception {
+ Match m = run("ushers", "he", "she", "his", "hers");
+ test(m, "she", 1);
+ }
+
+ @Test
+ public void test018() throws Exception {
+ Match m = run("dddhisheddd", "he", "she", "his", "hers");
+ test(m, "his", 3);
+ }
+
+ @Test
+ public void test019() throws Exception {
+ Match m = run("sotat", "at", "art", "oars", "soar");
+ test(m, "at", 3);
+ }
+
+ @Test
+ public void test020() throws Exception {
+ Match m = run("xxx", "x", "xx", "xxx");
+ test(m, "xxx", 0);
+ }
+
+ @Test
+ public void test021() throws Exception {
+ Match m = run("xx", "x", "xx", "xxx");
+ test(m, "xx", 0);
+ }
+
+ @Test
+ public void test022() throws Exception {
+ Match m = run("x", "x", "xx", "xxx");
+ test(m, "x", 0);
+ }
+
+ @Test
+ public void test023() throws Exception {
+ Match m = run("Lorem\r\nIpsum", "\n", "\r\n", "\r");
+ test(m, "\r\n", 5);
+ }
+
+ @Test
+ public void test024() throws Exception {
+ Match m = run("dcccacabcccabcc", "ab", "abcd");
+ test(m, "ab", 6);
+ }
+
+ @Test
+ public void test025() throws Exception {
+ Match m = run("dcccacabcccabcc", "abcd", "bccc");
+ test(m, "bccc", 7);
+ }
+
+ @Test
+ public void test026() throws Exception {
+ Match m = run("xxx", 1, "x", "xx", "xxx");
+ test(m, "xx", 1);
+ }
+
+ @Test
+ public void test027() throws Exception {
+ Match m = run("xxx", 2, "x", "xx", "xxx");
+ test(m, "x", 2);
+ }
+
+ @Test
+ public void test028() throws Exception {
+ Match m = run("dddhisheddd", 7, "he", "she", "his", "hers");
+ assertNull("Expected no match", m);
+ }
+
+ @Test
+ public void test029() throws Exception {
+ Match m = run("Lorem001Ipsum", "1", "01", "0");
+ test(m, "0", 5);
+ }
+
+ @Test
+ public void test030() throws Exception {
+ Match m = run("Lorem01Ipsum", "1", "01", "0");
+ test(m, "01", 5);
+ }
+
+ @Test
+ public void test031() throws Exception {
+ Match m = run("ddcababababababcabxdd", "ca", "cabx", "ababc");
+ test(m, "ca", 2);
+ }
+
+ @Test
+ public void test032() throws Exception {
+ Match m = run("ddcababcabxdd", "ca", "cabx", "cababc", "ababc");
+ test(m, "cababc", 2);
+ }
+
+ @Test
+ public void test033() throws Exception {
+ Match m = run("ddcababcdd", "ca", "cabx", "cababc", "ababc");
+ test(m, "cababc", 2);
+ }
+
+ @Test
+ public void test034() throws Exception {
+ Match m = run("ddcababcdd", "cababx", "ababc");
+ test(m, "ababc", 3);
+ }
+
+ @Test
+ public void test035() throws Exception {
+ Match m = run("ddcababcdd", "cababx", "abab", "ababc");
+ test(m, "ababc", 3);
+ }
+
+ @Test
+ public void test036() throws Exception {
+ Match m = run("ddcabababcdd", "ab", "abab", "cabababcy", "cabababcyy");
+ test(m, "abab", 3);
+ }
+
+ @Test
+ public void test037() throws Exception {
+ Match m = run("ddcabababcdd", "ab", "abab", "cabababc", "cabababcyy");
+ test(m, "cabababc", 2);
+ }
+
+ @Test
+ public void test038() throws Exception {
+ Match m = run("ddcabababcdd", "ab", "bababcd", "cabababcy", "cabababcyy");
+ test(m, "ab", 3);
+ }
+
+ @Test
+ public void test039() throws Exception {
+ Match m = run("ddcabababcddf", "bababcd", "bababcddf", "cabababcy", "cabababcyy");
+ test(m, "bababcddf", 4);
+ }
+
+ @Test
+ public void test040() throws Exception {
+ Match m = run("ddcabababcddffxx", "ffxx", "bababcd", "bababcddffy", "cabababcy", "cabababcyy");
+ test(m, "bababcd", 4);
+ }
+
+ @Test
+ public void multi001() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("he", "she", "his", "hers");
+ List<Match> matches = m.find("ushers", 0);
+ testList(matches, "[[she, 1], [he, 2], [hers, 2]]");
+ }
+
+ @Test
+ public void multi002() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("he", "she", "his", "hers");
+ List<Match> matches = m.find("dddhisheddd", 0);
+ testList(matches, "[[his, 3], [she, 5], [he, 6]]");
+ }
+
+ @Test
+ public void multi003() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("he", "she", "his", "sh", "is");
+ List<Match> matches = m.find("dddhisheddd", 0);
+ testList(matches, "[[his, 3], [is, 4], [sh, 5], [she, 5], [he, 6]]");
+ }
+
+ @Test
+ public void multi004() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("ab", "about", "at", "ate", "be", "bed", "edge", "get");
+ List<Match> matches = m.find("abedgetab", 0);
+ testList(matches, "[[ab, 0], [be, 1], [bed, 1], [edge, 2], [get, 4], [ab, 7]]");
+ }
+
+ @Test
+ public void multi005() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("at", "art", "oars", "soar");
+ List<Match> matches = m.find("soars", 0);
+ testList(matches, "[[soar, 0], [oars, 1]]");
+ }
+
+ @Test
+ public void multi006() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("at", "art", "oars", "soar");
+ List<Match> matches = m.find("oart", 0);
+ testList(matches, "[[art, 1]]");
+ }
+
+ @Test
+ public void multi007() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("at", "art", "oars", "soar");
+ List<Match> matches = m.find("soarsoars", 0);
+ testList(matches, "[[soar, 0], [oars, 1], [soar, 4], [oars, 5]]");
+ }
+
+ @Test
+ public void multi008() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("at", "art", "oars", "soar");
+ List<Match> matches = m.find("sotat", 0);
+ testList(matches, "[[at, 3]]");
+ }
+
+ @Test
+ public void multi009() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("i", "in", "tin", "sting");
+ List<Match> matches = m.find("sting", 0);
+ testList(matches, "[[sting, 0], [tin, 1], [i, 2], [in, 2]]");
+ }
+
+ @Test
+ public void multi010() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("i", "in", "tin", "sting", "hastings");
+ List<Match> matches = m.find("hastings", 0);
+ testList(matches, "[[hastings, 0], [sting, 2], [tin, 3], [i, 4], [in, 4]]");
+ }
+
+ @Test
+ public void multi011() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("x", "xx", "xxx");
+ List<Match> matches = m.find("xxx", 0);
+ testList(matches, "[[x, 0], [xx, 0], [xxx, 0], [x, 1], [xx, 1], [x, 2]]");
+ }
+
+ @Test
+ public void multi012() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("she", "his", "hers");
+ List<Match> matches = m.find("dddhiheddd", 0);
+ assertEquals("Expected no match", 0, matches.size());
+ }
+
+ @Test
+ public void multi013() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("he", "she", "his", "hers");
+ List<Match> matches = m.find("dddhisheddd", 4);
+ testList(matches, "[[she, 5], [he, 6]]");
+ }
+
+ @Test
+ public void multi014() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.create("x", "xx", "xxx");
+ List<Match> matches = m.find("xxx", 2);
+ testList(matches, "[[x, 2]]");
+ }
+
+ @Test
+ public void noStrings001() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.builder().build();
+ assertNull("Expected no match", m.indexOf("dhihedd", 0));
+ List<Match> matches = m.find("dddhiheddd", 0);
+ assertEquals("Expected no match", 0, matches.size());
+ }
+
+ @Test
+ public void noStrings002() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.builder().add("").build();
+ assertNull("Expected no match", m.indexOf("dhihedd", 0));
+ List<Match> matches = m.find("dddhiheddd", 0);
+ assertEquals("Expected no match", 0, matches.size());
+ }
+
+ @Test
+ public void noStrings003() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.builder().add((String[]) null).build();
+ assertNull("Expected no match", m.indexOf("dhihedd", 0));
+ List<Match> matches = m.find("dddhiheddd", 0);
+ assertEquals("Expected no match", 0, matches.size());
+ }
+
+ @Test
+ public void fluent001() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.builder().add("he", "she", "his", "hers").build();
+ test(m.indexOf("ushers", 0), "she", 1);
+ }
+
+ @Test
+ public void fluent002() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.builder().add("he", "she").add("his", "hers").build();
+ testList(m.find("ushers", 0), "[[she, 1], [he, 2], [hers, 2]]");
+ }
+
+ @Test
+ public void fluent003() throws Exception {
+ MultiStringMatcher m = MultiStringMatcher.builder().add("he").add(null, "she", "").add("his", "hers").build();
+ testList(m.find("ushers", 0), "[[she, 1], [he, 2], [hers, 2]]");
+ }
+
+ @Test
+ public void addAfterBuild() throws Exception {
+ MultiStringMatcher.Builder b = MultiStringMatcher.builder().add("he", "she").add("his", "hers");
+ b.build();
+ thrown.expect(IllegalStateException.class);
+ b.add("us");
+ }
+
+ @Test
+ public void reuseBuilder() throws Exception {
+ MultiStringMatcher.Builder b = MultiStringMatcher.builder().add("he", "she").add("his", "hers");
+ b.build();
+ thrown.expect(IllegalStateException.class);
+ b.build();
+ }
+}
diff --git a/org.eclipse.text.tests/src/org/eclipse/text/tests/TextUtilitiesTest.java b/org.eclipse.text.tests/src/org/eclipse/text/tests/TextUtilitiesTest.java
index 7504167e805..3a0cc75cc47 100644
--- a/org.eclipse.text.tests/src/org/eclipse/text/tests/TextUtilitiesTest.java
+++ b/org.eclipse.text.tests/src/org/eclipse/text/tests/TextUtilitiesTest.java
@@ -13,7 +13,8 @@
*******************************************************************************/
package org.eclipse.text.tests;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
import java.util.ArrayList;
import java.util.Iterator;

Back to the top