diff options
author | Thomas Wolf | 2020-04-23 10:35:03 +0000 |
---|---|---|
committer | Thomas Wolf | 2020-04-23 13:48:21 +0000 |
commit | 62979ac7fa6172f2eb7230b2360c87edcb312c0a (patch) | |
tree | c5449821347c927d0afedf1e0c199e5dad5f76d9 | |
parent | 02d6d740ee76d7f1ded48da1d58240e263da5aa7 (diff) | |
download | rt.equinox.bundles-62979ac7fa6172f2eb7230b2360c87edcb312c0a.tar.gz rt.equinox.bundles-62979ac7fa6172f2eb7230b2360c87edcb312c0a.tar.xz rt.equinox.bundles-62979ac7fa6172f2eb7230b2360c87edcb312c0a.zip |
Bug 508611 - Public StringMatcher wildcard match API
StringMatcher exists in countless copies in Eclipse. Provide one
implementation as public API so that the other bundles can re-use
it instead of copying it.
Copy the version from JDT, add javadoc, clean up a little (mainly
braces and using ArrayList instead of Vector), correct existing
javadoc. Make the class final.
Change-Id: Ief50131e282a346e0ca2d00b3fa46a1969aa9d0d
Signed-off-by: Thomas Wolf <thomas.wolf@paranor.ch>
-rw-r--r-- | bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF | 1 | ||||
-rw-r--r-- | bundles/org.eclipse.equinox.common/src/org/eclipse/core/text/StringMatcher.java | 524 |
2 files changed, 525 insertions, 0 deletions
diff --git a/bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF b/bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF index d9d7f32df..be4c16589 100644 --- a/bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF +++ b/bundles/org.eclipse.equinox.common/META-INF/MANIFEST.MF @@ -15,6 +15,7 @@ Export-Package: org.eclipse.core.internal.boot;x-friends:="org.eclipse.core.reso org.eclipse.core.filesystem, org.eclipse.equinox.security", org.eclipse.core.runtime;common=split;version="3.5.0";mandatory:=common, + org.eclipse.core.text;version="3.12.0", org.eclipse.equinox.events;version="1.0.0" Bundle-Vendor: %providerName Bundle-Activator: org.eclipse.core.internal.runtime.Activator diff --git a/bundles/org.eclipse.equinox.common/src/org/eclipse/core/text/StringMatcher.java b/bundles/org.eclipse.equinox.common/src/org/eclipse/core/text/StringMatcher.java new file mode 100644 index 000000000..07d24b41d --- /dev/null +++ b/bundles/org.eclipse.equinox.common/src/org/eclipse/core/text/StringMatcher.java @@ -0,0 +1,524 @@ +/******************************************************************************* + * Copyright (c) 2000, 2020 IBM Corporation and others. + * + * This program and the accompanying materials + * are made available under the terms of the Eclipse Public License 2.0 + * which accompanies this distribution, and is available at + * https://www.eclipse.org/legal/epl-2.0/ + * + * SPDX-License-Identifier: EPL-2.0 + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ +package org.eclipse.core.text; + +import java.util.ArrayList; +import java.util.List; + +/** + * A string pattern matcher. Supports '*' and '?' wildcards. + * + * @since 3.12 + */ +public final class StringMatcher { + + private final String fPattern; + + private final int fLength; // pattern length + + private final boolean fIgnoreWildCards; + + private final boolean fIgnoreCase; + + private boolean fHasLeadingStar; + + private boolean fHasTrailingStar; + + private String fSegments[]; // the given pattern is split into * separated segments + + /* Boundary value beyond which we don't need to search in the text. */ + private int fBound = 0; + + private static final char fSingleWildCard = '\u0000'; + + /** + * Start and end positions of a shortest match found by + * {@link StringMatcher#find(String, int, int)}. + * <p> + * Note that {@link StringMatcher#find(String, int, int) find()} returns + * {@code null} if there is no match, so the start and end indices are always + * non-negative. + * </p> + */ + public static final class Position { + + private final int start; // inclusive + private final int end; // exclusive + + /** + * Creates a new {@link Position}. + * + * @param start index of first matched character + * @param end index after the last matched character + */ + public Position(int start, int end) { + this.start = start; + this.end = end; + } + + /** + * Retrieves the index of the first matched character. + * + * @return the index of the first matched character + */ + public int getStart() { + return start; + } + + /** + * Retrieves the index after the last matched character. + * + * @return the index after the last matched character + */ + public int getEnd() { + return end; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + end; + result = prime * result + start; + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + if (obj == null || getClass() != obj.getClass()) { + return false; + } + Position other = (Position) obj; + return end == other.end && start == other.start; + } + + @Override + public String toString() { + return "Position(" + start + ',' + end + ')'; //$NON-NLS-1$ + } + } + + /** + * StringMatcher constructor takes in a String object that is a simple pattern. + * The pattern may contain '*' for 0 and many characters and '?' for exactly one + * character. + * + * Literal '*' and '?' characters must be escaped in the pattern e.g., "\*" + * means literal "*", etc. + * + * The escape character '\' is an escape only if followed by '*', '?', or '\'. + * All other occurrences are taken literally. + * + * If invoking the StringMatcher with string literals in Java, don't forget + * escape characters are represented by "\\". + * + * @param pattern the pattern to match text against, must not be {@code null} + * @param ignoreCase if true, case is ignored + * @param ignoreWildCards if true, wild cards and their escape sequences are + * ignored (everything is taken literally). + * @throws IllegalArgumentException if {@code pattern == null} + */ + public StringMatcher(String pattern, boolean ignoreCase, boolean ignoreWildCards) { + if (pattern == null) { + throw new IllegalArgumentException(); + } + fIgnoreCase = ignoreCase; + fIgnoreWildCards = ignoreWildCards; + fPattern = pattern; + fLength = pattern.length(); + + if (fIgnoreWildCards) { + parseNoWildCards(); + } else { + parseWildCards(); + } + } + + /** + * Finds the first occurrence of the pattern between {@code start} (inclusive) + * and {@code end} (exclusive). + * <p> + * If wildcards are enabled: If the pattern contains only '*' wildcards a full + * match is reported, otherwise leading and trailing '*' wildcards are ignored. + * If the pattern contains interior '*' wildcards, the first <em>shortest</em> + * match is returned. + * </p> + * + * @param text the String object to search in; must not be {@code null} + * @param start the starting index of the search range, inclusive + * @param end the ending index of the search range, exclusive + * @return a {@link Position} object for the match found, or {@code null} if + * there's no match or the text range to search is empty (end <= + * start). If the pattern is empty, the position will describe a + * null-match in the {@code text} at {@code start} + * ({@link Position#getStart() getStart()} == {@link Position#getEnd() + * getEnd()} == {@code start}).<br/> + * <b>Note:</b> for patterns like "*abc*" with leading and trailing + * stars, the position of "abc" is returned. For a pattern like"*??*" in + * text "abcdf", (0,2) is returned. Interior '*'s yield the + * <em>shortest</em> match: for pattern "a*b" and text "axbyb", (0,3) is + * returned, not (0,5). + * @throws IllegalArgumentException if {@code text == null} + */ + public Position find(String text, int start, int end) { + if (text == null) { + throw new IllegalArgumentException(); + } + int tlen = text.length(); + if (start < 0) { + start = 0; + } + if (end > tlen) { + end = tlen; + } + if (end < 0 || start >= end) { + return null; + } + if (fLength == 0) { + return new Position(start, start); + } + if (fIgnoreWildCards) { + int x = posIn(text, start, end); + return x < 0 ? null : new Position(x, x + fLength); + } + + int segCount = fSegments.length; + if (segCount == 0) { + // Pattern contains only '*'(s) + return new Position(start, end); + } + + int curPos = start; + int matchStart = -1; + int i; + for (i = 0; i < segCount && curPos < end; ++i) { + String current = fSegments[i]; + int nextMatch = regExpPosIn(text, curPos, end, current); + if (nextMatch < 0) { + return null; + } + if (i == 0) { + matchStart = nextMatch; + } + curPos = nextMatch + current.length(); + } + return i < segCount ? null : new Position(matchStart, curPos); + } + + /** + * Determines whether the given {@code text} matches the pattern. + * + * @param text String to match; must not be {@code null} + * @return {@code true} if the whole {@code text} matches the pattern; + * {@code false} otherwise + * @throws IllegalArgumentException if {@code text == null} + */ + public boolean match(String text) { + if (text == null) { + throw new IllegalArgumentException(); + } + return match(text, 0, text.length()); + } + + /** + * Determines whether the given sub-string of {@code text} from {@code start} + * (inclusive) to {@code end} (exclusive) matches the pattern. + * + * @param text String to match in; must not be {@code null} + * @param start start index (inclusive) within {@code text} of the sub-string to + * match + * @param end end index (exclusive) within {@code text} of the sub-string to + * match + * @return {@code true} if the given slice of {@code text} matches the pattern; + * {@code false} otherwise + * @throws IllegalArgumentException if {@code text == null} + */ + public boolean match(String text, int start, int end) { + if (text == null) { + throw new IllegalArgumentException(); + } + if (start > end) { + return false; + } + if (fIgnoreWildCards) { + return (end - start == fLength) && fPattern.regionMatches(fIgnoreCase, 0, text, start, fLength); + } + + int segCount = fSegments.length; + if (segCount == 0 && (fHasLeadingStar || fHasTrailingStar)) { + // Pattern contains only '*'(s) + return true; + } + if (start == end) { + return fLength == 0; + } + if (fLength == 0) { + return start == end; + } + + int tlen = text.length(); + if (start < 0) { + start = 0; + } + if (end > tlen) { + end = tlen; + } + + int tCurPos = start; + int bound = end - fBound; + if (bound < 0) { + return false; + } + + int i = 0; + String current = fSegments[i]; + int segLength = current.length(); + + // Process first segment + if (!fHasLeadingStar) { + if (!regExpRegionMatches(text, start, current, 0, segLength)) { + return false; + } + ++i; + tCurPos = tCurPos + segLength; + } + if ((fSegments.length == 1) && (!fHasLeadingStar) && (!fHasTrailingStar)) { + // Only one segment to match, no wildcards specified + return tCurPos == end; + } + // Process middle segments + while (i < segCount) { + current = fSegments[i]; + int currentMatch; + int k = current.indexOf(fSingleWildCard); + if (k < 0) { + currentMatch = textPosIn(text, tCurPos, end, current); + if (currentMatch < 0) { + return false; + } + } else { + currentMatch = regExpPosIn(text, tCurPos, end, current); + if (currentMatch < 0) { + return false; + } + } + tCurPos = currentMatch + current.length(); + i++; + } + + // Process final segment + if (!fHasTrailingStar && tCurPos != end) { + int clen = current.length(); + return regExpRegionMatches(text, end - clen, current, 0, clen); + } + return i == segCount; + } + + /** + * Returns the single segment for a matcher ignoring wildcards. + */ + private void parseNoWildCards() { + fSegments = new String[] {fPattern}; + fBound = fLength; + } + + /** + * Parses the given pattern into segments separated by wildcard '*' characters. + */ + private void parseWildCards() { + if (fPattern.startsWith("*")) { //$NON-NLS-1$ + fHasLeadingStar = true; + } + if (fPattern.endsWith("*")) { //$NON-NLS-1$ + // Make sure it's not an escaped wildcard + if (fLength > 1 && fPattern.charAt(fLength - 2) != '\\') { + fHasTrailingStar = true; + } + } + + List<String> temp = new ArrayList<>(); + + int pos = 0; + StringBuilder buf = new StringBuilder(); + while (pos < fLength) { + char c = fPattern.charAt(pos++); + switch (c) { + case '\\' : + if (pos >= fLength) { + // Lone backslash at the end is taken literally + buf.append(c); + } else { + char next = fPattern.charAt(pos++); + if (next == '*' || next == '?' || next == '\\') { + // It _is_ an escape sequence + buf.append(next); + } else { + // Not an escape sequence, just insert literally + buf.append(c); + buf.append(next); + } + } + break; + case '*' : + if (buf.length() > 0) { + // Create a new segment + temp.add(buf.toString()); + fBound += buf.length(); + buf.setLength(0); + } + break; + case '?' : + // Append special character representing single match wildcard + buf.append(fSingleWildCard); + break; + default : + buf.append(c); + } + } + + // Add last buffer to segment list + if (buf.length() > 0) { + temp.add(buf.toString()); + fBound += buf.length(); + } + + fSegments = temp.toArray(new String[0]); + } + + /** + * Determines the position of the first match of the pattern, which must not + * contain wildcards, in the region {@code text[start..end-1]}. + * + * @param text to find the pattern match in + * @param start the starting index in the text for search, inclusive + * @param end the stopping point of search, exclusive + * @return the starting index in the text of the pattern , or -1 if not found + */ + protected int posIn(String text, int start, int end) {// no wild card in pattern + int max = end - fLength; + + if (!fIgnoreCase) { + int i = text.indexOf(fPattern, start); + if (i == -1 || i > max) { + return -1; + } + return i; + } + + for (int i = start; i <= max; ++i) { + if (text.regionMatches(true, i, fPattern, 0, fLength)) { + return i; + } + } + + return -1; + } + + /** + * Determines the position of the first match of pattern {@code p}, which must + * not contain wildcards, in the region {@code text[start..end-1]}. + * + * @param text to find the pattern match in + * @param start the starting index in the text for search, inclusive + * @param end the stopping point of search, exclusive + * @param p a plain text without any wildcards + * @return the starting index in the text of the pattern , or -1 if not found + */ + private int textPosIn(String text, int start, int end, String p) { + int plen = p.length(); + int max = end - plen; + + if (!fIgnoreCase) { + int i = text.indexOf(p, start); + if (i < 0 || i > max) { + return -1; + } + return i; + } + + for (int i = start; i <= max; ++i) { + if (text.regionMatches(true, i, p, 0, plen)) { + return i; + } + } + + return -1; + } + + /** + * Determines the position of the first match of pattern {@code p} in the region + * {@code text[start..end-1]}. + * + * @param text to find the pattern match in + * @param start the starting index in the text for search, inclusive + * @param end the stopping point of search, exclusive + * @param p a simple regular expression that may contain single-character + * wildcards + * @return the starting index in the text of the pattern , or -1 if not found + */ + private int regExpPosIn(String text, int start, int end, String p) { + int plen = p.length(); + int max = end - plen; + + for (int i = start; i <= max; ++i) { + if (regExpRegionMatches(text, i, p, 0, plen)) { + return i; + } + } + return -1; + } + + /** + * Determines whether the region {@code text[tstart..tstart+plen-1]} matches the + * pattern {@code p}, which may contain single-character wildcards. + * + * @param text String to match against the pattern + * @param tStart Index in {@code text} to start matching at + * @param p String pattern to match against; may contain single-character + * wildcards + * @param pStart Start of the pattern to match against + * @param plen Length of {@code p} + * @return {@Šode true} if the text matches; {@code false} otherwise + */ + private boolean regExpRegionMatches(String text, int tStart, String p, int pStart, int plen) { + while (plen-- > 0) { + char tchar = text.charAt(tStart++); + char pchar = p.charAt(pStart++); + + // Process wild cards + if (!fIgnoreWildCards) { + // Skip single-character wildcards + if (pchar == fSingleWildCard) { + continue; + } + } + if (pchar == tchar) { + continue; + } + if (fIgnoreCase) { + if (Character.toUpperCase(tchar) == Character.toUpperCase(pchar)) + continue; + // comparing after converting to upper case doesn't handle all cases; + // also compare after converting to lower case + if (Character.toLowerCase(tchar) == Character.toLowerCase(pchar)) + continue; + } + return false; + } + return true; + } +} |