diff options
author | Joerg Kubitz | 2021-09-09 06:56:59 +0000 |
---|---|---|
committer | Lars Vogel | 2021-09-10 16:54:48 +0000 |
commit | e5b5814ba9b6f497470b731ddb919224046d6d1a (patch) | |
tree | 19b09281946cd722a089aad8792ff4de5e7a6bb6 | |
parent | 9f8c0aa760f5576ee96a78f0675c56e1a37a6d62 (diff) | |
download | eclipse.platform.text-e5b5814ba9b6f497470b731ddb919224046d6d1a.tar.gz eclipse.platform.text-e5b5814ba9b6f497470b731ddb919224046d6d1a.tar.xz eclipse.platform.text-e5b5814ba9b6f497470b731ddb919224046d6d1a.zip |
Bug 575893 - [performance] improve file search: non-regexpI20210911-1800I20210910-1800
Use Pattern.quote for non-wildcards in non-regexp search
instead of escaping each single character.
Change-Id: I061506448cdde5ab17ad1b9c551111472acf47eb
Signed-off-by: Joerg Kubitz <jkubitz-eclipse@gmx.de>
Reviewed-on: https://git.eclipse.org/r/c/platform/eclipse.platform.text/+/185193
Tested-by: Platform Bot <platform-bot@eclipse.org>
Reviewed-by: Lars Vogel <Lars.Vogel@vogella.com>
-rw-r--r-- | org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java | 39 | ||||
-rw-r--r-- | org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java | 57 |
2 files changed, 66 insertions, 30 deletions
diff --git a/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java b/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java index 51e3c093038..8439dc29140 100644 --- a/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java +++ b/org.eclipse.search.tests/src/org/eclipse/search/tests/filesearch/FileSearchTests.java @@ -362,6 +362,45 @@ public class FileSearchTests { } @Test + public void testWildcardQuotes() throws Exception { + assertWildcardReplace("H", "Hallo", "-allo"); + assertWildcardReplace("a", "Hallo", "H-llo"); + assertWildcardReplace("al", "Hallo", "H-lo"); + assertWildcardReplace("a*", "Hallo", "H-"); + assertWildcardReplace("a?", "Hallo", "H-lo"); + assertWildcardReplace("?", "Hallo", "-----"); + assertWildcardReplace("{", "Ha({o", "Ha(-o"); + assertWildcardReplace("(", "Ha({o", "Ha-{o"); + assertWildcardReplace("\\", "Ha\\\\o", "Ha--o"); + assertWildcardReplace("\\\\", "Ha\\\\o", "Ha--o"); + assertWildcardReplace("\\*", "Hall*", "Hall-"); + assertWildcardReplace("\\?", "Ha??o?", "Ha--o-"); + assertWildcardReplace("Du?und?ich", "Du und ich nicht", "- nicht"); + assertWildcardReplace("Du*ich", "Du und ich nicht", "-t"); + assertWildcardReplace("und*ich", "Du und ich nicht", "Du -t"); + assertWildcardReplace("*ich", "Du und ich nicht", "-t"); + + assertWildcardReplace("*", "Hallo", "--"); + // XXX i expect it to be "-" but ".*" indeed matches chars 0-5 and 5-5 + // it would need ".+" to not match the empty string at the end + } + + private void assertWildcardReplace(String pattern, String in, String expected) { + String regex= asRegEx(true, pattern); + try { + String replaced= in.replaceAll(regex, "-"); + assertEquals(expected, replaced); + } catch (Exception e) { + throw new RuntimeException("Error with pattern:" + pattern + " regex=" + regex, e); + } + } + + String asRegEx(boolean wildcards, String pattern) { + StringBuilder b= new StringBuilder(); + org.eclipse.search.internal.core.text.PatternConstructor.appendAsRegEx(wildcards, pattern, b); + return b.toString(); + } + @Test public void testDerivedFilesParallel() throws Exception { testDerivedFiles(new ParallelTestResultCollector()); } diff --git a/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java b/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java index b785782b8da..287fb18d688 100644 --- a/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java +++ b/org.eclipse.search/search/org/eclipse/search/internal/core/text/PatternConstructor.java @@ -66,7 +66,7 @@ public class PatternConstructor { if (isWholeWord && len > 0 && isWordChar(pattern.charAt(len - 1))) { buffer.append("\\b"); //$NON-NLS-1$ } - pattern= buffer.toString(); + pattern= buffer.toString(); } int regexOptions= Pattern.MULTILINE; @@ -183,6 +183,12 @@ public class PatternConstructor { public static StringBuilder appendAsRegEx(boolean isStringMatcher, String pattern, StringBuilder buffer) { + if (!isStringMatcher) { + buffer.append(Pattern.quote(pattern)); + return buffer; + } + // isStringMatcher: '*' and '?' wildcards and '\' as escape + StringBuilder quoted = new StringBuilder(); boolean isEscaped= false; for (int i = 0; i < pattern.length(); i++) { char c = pattern.charAt(i); @@ -190,59 +196,46 @@ public class PatternConstructor { // the backslash case '\\': // the backslash is escape char in string matcher - if (isStringMatcher && !isEscaped) { + if (!isEscaped) { isEscaped= true; } else { - buffer.append("\\\\"); //$NON-NLS-1$ - isEscaped= false; - } - break; - // characters that need to be escaped in the regex. - case '(': - case ')': - case '{': - case '}': - case '.': - case '[': - case ']': - case '$': - case '^': - case '+': - case '|': - if (isEscaped) { - buffer.append("\\\\"); //$NON-NLS-1$ + quoted.append(c); isEscaped= false; } - buffer.append('\\'); - buffer.append(c); break; case '?': - if (isStringMatcher && !isEscaped) { + if (!isEscaped) { + if (quoted.length() > 0) { // flush quote + buffer.append(Pattern.quote(quoted.toString())); + quoted = new StringBuilder(); + } buffer.append('.'); } else { - buffer.append('\\'); - buffer.append(c); + quoted.append(c); isEscaped= false; } break; case '*': - if (isStringMatcher && !isEscaped) { + if (!isEscaped) { + if (quoted.length() > 0) { // flush quote + buffer.append(Pattern.quote(quoted.toString())); + quoted = new StringBuilder(); + } buffer.append(".*"); //$NON-NLS-1$ } else { - buffer.append('\\'); - buffer.append(c); + quoted.append(c); isEscaped= false; } break; default: if (isEscaped) { - buffer.append("\\\\"); //$NON-NLS-1$ + quoted.append("\\"); //$NON-NLS-1$ isEscaped= false; } - buffer.append(c); + quoted.append(c); break; } } @@ -250,6 +243,10 @@ public class PatternConstructor { buffer.append("\\\\"); //$NON-NLS-1$ isEscaped= false; } + if (quoted.length() > 0) { // flush quote + buffer.append(Pattern.quote(quoted.toString())); + quoted = new StringBuilder(); + } return buffer; } |