diff options
author | Julian Honnen | 2020-07-16 11:02:30 +0000 |
---|---|---|
committer | Julian Honnen | 2020-07-16 11:02:30 +0000 |
commit | 55cd7fdcd2486170b7f6a341015cae5df037d88f (patch) | |
tree | 5cffa165740add5d5592c7ae9e1f1a61b6d6a0d3 | |
parent | c1c989f6c7f6d8d058bbbf089b250ca1c3d327f1 (diff) | |
download | eclipse.jdt.core-55cd7fdcd2486170b7f6a341015cae5df037d88f.tar.gz eclipse.jdt.core-55cd7fdcd2486170b7f6a341015cae5df037d88f.tar.xz eclipse.jdt.core-55cd7fdcd2486170b7f6a341015cae5df037d88f.zip |
Bug 565267 - [subword] improved boundaries for mixed caps and camel caseI20200718-1800I20200717-1800I20200716-1800
Fixed detection of word boundary after capital name prefix.
Examples:
* IImportWizard matched by "import": I[Import]Wizard
* HTMLTable matched by "table": HTML[Table]
The state-machine based detection turned out to be not a good fit.
Instead detect boundaries by comparing the cases in every tuple of
(previous, current, next) chars.
Change-Id: If87714632f1103b1fbb96171c1c29483d7417d94
Signed-off-by: Julian Honnen <julian.honnen@vector.com>
-rw-r--r-- | org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java | 25 | ||||
-rw-r--r-- | org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java | 84 |
2 files changed, 56 insertions, 53 deletions
diff --git a/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java b/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java index c890b46e7a..aa2aa77097 100644 --- a/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java +++ b/org.eclipse.jdt.core.tests.model/src/org/eclipse/jdt/core/tests/model/MatchingRegionsTest.java @@ -1609,6 +1609,11 @@ public void testSubword_caps_boundaries2() { int[] regions = SearchPattern.getMatchingRegions("ini", name, SearchPattern.R_SUBWORD_MATCH); assertEquals("Unexpected matching regions", null, printRegions(name, regions)); } +public void testSubword_caps_boundaries3() { + String name = "CASE_INSENSITIVE_ORDER"; + int[] regions = SearchPattern.getMatchingRegions("sensitive", name, SearchPattern.R_SUBWORD_MATCH); + assertEquals("Unexpected matching regions", null, printRegions(name, regions)); +} public void testSubword_caps_backtracking() { String name = "LIST_LISTENER"; int[] regions = SearchPattern.getMatchingRegions("listener", name, SearchPattern.R_SUBWORD_MATCH); @@ -1619,4 +1624,24 @@ public void testSubword_snakeCase() { int[] regions = SearchPattern.getMatchingRegions("addlistener", name, SearchPattern.R_SUBWORD_MATCH); assertEquals("Unexpected matching regions", "[add]_list_[listener]", printRegions(name, regions)); } +public void testSubword_mixedCamelCase1() { + String name = "IImportWizard"; + int[] regions = SearchPattern.getMatchingRegions("import", name, SearchPattern.R_SUBWORD_MATCH); + assertEquals("Unexpected matching regions", "I[Import]Wizard", printRegions(name, regions)); +} +public void testSubword_mixedCamelCase2() { + String name = "HTMLTable"; + int[] regions = SearchPattern.getMatchingRegions("table", name, SearchPattern.R_SUBWORD_MATCH); + assertEquals("Unexpected matching regions", "HTML[Table]", printRegions(name, regions)); +} +public void testSubword_mixedCamelCase3() { + String name = "CustomHTMLTable"; + int[] regions = SearchPattern.getMatchingRegions("table", name, SearchPattern.R_SUBWORD_MATCH); + assertEquals("Unexpected matching regions", "CustomHTML[Table]", printRegions(name, regions)); +} +public void testSubword_mixedCamelCase4() { + String name = "ImportHTML"; + int[] regions = SearchPattern.getMatchingRegions("html", name, SearchPattern.R_SUBWORD_MATCH); + assertEquals("Unexpected matching regions", "Import[HTML]", printRegions(name, regions)); +} } diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java index 806091f882..9886aefd5c 100644 --- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java +++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/core/compiler/SubwordMatcher.java @@ -7,7 +7,7 @@ * https://www.eclipse.org/legal/epl-2.0/ * * SPDX-License-Identifier: EPL-2.0 - * + * * Contributors: * Julian Honnen - initial API and implementation *******************************************************************************/ @@ -29,16 +29,42 @@ class SubwordMatcher { this.name = name.toCharArray(); this.wordBoundaries = new BitSet(name.length()); - BoundaryState state = BoundaryState.SEPARATOR; for (int i = 0; i < this.name.length; i++) { - char c = this.name[i]; - if (state.isWordBoundary(c)) { + if (isWordBoundary(caseAt(i - 1), caseAt(i), caseAt(i + 1))) { this.wordBoundaries.set(i); } - state = state.next(c); } } + private Case caseAt(int index) { + if (index < 0 || index >= this.name.length) + return Case.SEPARATOR; + + char c = this.name[index]; + if (c == '_') + return Case.SEPARATOR; + if (ScannerHelper.isUpperCase(c)) + return Case.UPPER; + return Case.LOWER; + } + + private static boolean isWordBoundary(Case p, Case c, Case n) { + if (p == c && c == n) + return false; // a boundary needs some kind of gradient + + if (p == Case.SEPARATOR) + return true; // boundary after every separator + + // the remaining cases are boundaries for capitalization changes: + // lowerUpper, UPPERLower, lowerUPPER + // ^ ^ ^ + return (c == Case.UPPER) && (p == Case.LOWER || n == Case.LOWER); + } + + private enum Case { + SEPARATOR, LOWER, UPPER + } + public int[] getMatchingRegions(String pattern) { int segmentStart = 0; int[] segments = EMPTY_REGIONS; @@ -131,52 +157,4 @@ class SubwordMatcher { private boolean isWordBoundary(int iName) { return this.wordBoundaries.get(iName); } - - private enum BoundaryState { - SEPARATOR() { - @Override - public BoundaryState next(char c) { - if (c == '_') - return SEPARATOR; - - return ScannerHelper.isUpperCase(c) ? CAPS_WORD : WORD; - } - @Override - public boolean isWordBoundary(char c) { - return true; - } - }, - WORD() { - @Override - public BoundaryState next(char c) { - if (c == '_') - return SEPARATOR; - - return WORD; - } - - @Override - public boolean isWordBoundary(char c) { - return ScannerHelper.isUpperCase(c); - } - }, - CAPS_WORD() { - @Override - public BoundaryState next(char c) { - if (c == '_') - return SEPARATOR; - - return ScannerHelper.isUpperCase(c) ? CAPS_WORD : WORD; - } - - @Override - public boolean isWordBoundary(char c) { - return next(c) == SEPARATOR; - } - }; - - public abstract boolean isWordBoundary(char c); - - public abstract BoundaryState next(char c); - } }
\ No newline at end of file |