Skip to main content
aboutsummaryrefslogtreecommitdiffstats
blob: 0c77825f1ff0d4f313c427ae9696b14966c7dec9 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
/*******************************************************************************
 * Copyright (c) 2011, 2016 Tomasz Wesolowski and others
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *    Tomasz Wesolowski - initial API and implementation
 *    Jens Elmenthaler - further tweaking
 *******************************************************************************/
package org.eclipse.cdt.core.parser.util;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * A matcher for camel case matching supporting both the camel case as well as
 *  he underscore notation.
 * 
 * @noextend This class is not intended to be subclassed by clients.
 * @since 5.3
 */
public class SegmentMatcher {

	private final char[] prefixForBinarySearch;

	/** The string the any prefix match has to start with. */
	private final char[] prefixForMatching;

	/** The regular expression for a segment match. */
	private final Pattern regexp;
	
	/** The minimum length any name must have in order to match. */
	private final int minNameLength;

	private final boolean singleSegment;
	
	/**
	 * @param pattern
	 *            The camel case or underscore pattern.
	 */
	public SegmentMatcher(char[] pattern) {

		if (pattern == null || pattern.length == 0) {
			prefixForMatching = CharArrayUtils.EMPTY;
			prefixForBinarySearch = CharArrayUtils.EMPTY;
			regexp = null;
			minNameLength = 0;
			singleSegment = true;
		} else {

			StringBuilder regexpBuffer = new StringBuilder("^"); //$NON-NLS-1$
			int i = 0;
			int lengthOfFirstSegment = 0;
			char currentChar;
			int segmentCount = 0;
			
			// Translate each segment
			while (i < pattern.length) {

				boolean separatorSpecified = false;

				// Handle prefix, i.e. anything before the first letter or digit
				for (; i < pattern.length; ++i) {
					currentChar = pattern[i];
					if (Character.isLetterOrDigit(currentChar)) {
						break;
					} else {
						// Quote those characters.
						regexpBuffer.append(Pattern.quote(String.valueOf(currentChar)));
						separatorSpecified = true;
					}
				}

				if (i < pattern.length) {
					// The character here is always a letter or digit.
					currentChar = pattern[i];

					if (Character.isDigit(currentChar)) {

						// Handle number segment
						regexpBuffer.append(currentChar);
						for (++i; i < pattern.length; ++i) {
							currentChar = pattern[i];
							if (Character.isDigit(currentChar)) {
								regexpBuffer.append(currentChar);
							} else {
								break;
							}
						}

					} else {

						// Handle text segment
						char lower = Character.toLowerCase(currentChar);
						char upper = Character.toUpperCase(currentChar);

						if ((segmentCount == 0) || separatorSpecified) {
							regexpBuffer.append(currentChar);
						} else {
							regexpBuffer.append("(_["); //$NON-NLS-1$
							regexpBuffer.append(lower);
							regexpBuffer.append(upper);
							regexpBuffer.append("]|"); //$NON-NLS-1$
							regexpBuffer.append(upper);
							regexpBuffer.append(')');
						}

						// Remaining letters of the segment
						for (++i; i < pattern.length; ++i) {

							currentChar = pattern[i];
							if (Character.isLetter(currentChar)) {
								if (Character.isUpperCase(currentChar)) {
									break;
								} else {
									lower = currentChar;
									upper = Character.toUpperCase(currentChar);
									regexpBuffer.append('[');
									regexpBuffer.append(lower);
									regexpBuffer.append(upper);
									regexpBuffer.append(']');
								}
							} else {
								break;
							}
						}
					}
				}
				regexpBuffer.append(".*"); //$NON-NLS-1$
				
				if (segmentCount == 0) {
					lengthOfFirstSegment = i;
				}
				
				++segmentCount;
			}
			
			regexp = Pattern.compile(regexpBuffer.toString());
			singleSegment = (segmentCount == 1);
			prefixForMatching = pattern;
			
			// The first segment is also the binary search prefix
			prefixForBinarySearch = CharArrayUtils.extract(pattern, 0, lengthOfFirstSegment);
			
			minNameLength = pattern.length;
		}
	}

	/**
	 * Matches the given name by prefix and segment matching.
	 * 
	 * @return true if the associated pattern is a prefix-based or segment-based abbreviation of name.
	 */
	public boolean match(char[] name) {
		if (matchPrefix(name)) {
			return true;
		}

		// If there is only a single segment given and prefix match failed,
		// the segment match cannot pass either. So skip it.
		if (singleSegment) {
			return false;
		}

		return matchSegments(name);
	}

	/**
	 * Matches the given name by prefix matching.
	 * 
	 * @return true if the associated pattern is a prefix-based abbreviation of name.
	 */
	public boolean matchPrefix(char[] name) {
		return (CharArrayUtils.equals(name, 0, prefixForMatching.length, prefixForMatching, true));
	}

	/**
	 * Matches the given name by segment matching.
	 * 
	 * @return true if the associated pattern is a segment-based abbreviation of name.
	 */
	public boolean matchSegments(char[] name) {

		if (name == null) {
			return false;
		}
		
		if (name.length < minNameLength) {
			return false;
		}
						
		if (regexp == null) {
			return true;
		}
		
		Matcher matcher = regexp.matcher(String.valueOf(name));
		
		return matcher.find();
	}
	
	/**
	 * Matches pattern to name by prefix and segment matching. If you have to match
	 * against the same pattern repeatedly, create a {@link SegmentMatcher} instead
	 * and re-use it all the time, because this is much faster.
	 * 
	 * @return true if pattern is a prefix-based or segment-based abbreviation of name
	 */
	public static boolean match(char[] pattern, char[] name) {
		return (new SegmentMatcher(pattern)).match(name);
	}

	/**
	 * The pattern used by this matcher is not suitable for binary searches
	 * (e.g. within the index).
	 * However, there can be calculated a string that can be used in the
	 * context of binary searches.
	 * In the compare method used by your binary search, return 0 for any string
	 * that starts with the returned string.
	 * 
	 * @return Such a string.
	 */
	public char[] getPrefixForBinarySearch() {
		return prefixForBinarySearch;
	}
	
	/**
	 * @return If false, calling @{@link #match(char[])} can be skipped if a 
	 *         name survived a binary search using the prefix returned by
	 *         @{@link #getPrefixForBinarySearch()} as key.
	 */
	public boolean matchRequiredAfterBinarySearch() {
		return !singleSegment;
	}
}

Back to the top