blob: 86605af85da3e5ee2d9d97626802adab85efd784 (
plain) (
blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
/*******************************************************************************
* Copyright (c) 2000, 2011 IBM Corporation and others.
*
* This program and the accompanying materials
* are made available under the terms of the Eclipse Public License 2.0
* which accompanies this distribution, and is available at
* https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.compare.contentmergeviewer;
import org.eclipse.compare.rangedifferencer.IRangeComparator;
import org.eclipse.core.runtime.Assert;
/**
* Implements the <code>ITokenComparator</code> interface for words (or tokens)
* in a string.
* A <code>TokenComparator</code> is used as the input for the <code>RangeDifferencer</code>
* engine to perform a token oriented compare on strings.
* <p>
* This class may be instantiated by clients but is not intended to be subclassed.
* @since 3.4
*/
public class TokenComparator implements ITokenComparator {
private String fText;
private int fCount;
private int[] fStarts;
private int[] fLengths;
/**
* Creates a <code>TokenComparator</code> for the given string.
*
* @param text the string that is split into token
*/
public TokenComparator(String text) {
Assert.isNotNull(text);
fText= text;
int length= fText.length();
fStarts= new int[length]; // pessimistic assumption!
fLengths= new int[length];
fCount= 0;
char lastCategory= 0; // 0: no category
for (int i= 0; i < length; i++) {
char c= fText.charAt(i);
char category= '?'; // unspecified category
if (Character.isWhitespace(c))
category= ' '; // white space category
else if (Character.isDigit(c))
category= '0'; // digits
else if (Character.isLetter(c))
category= 'a'; // letters
else if (c == '\"' || c == '\'')
category= '\"'; // quotes (see bug 198671)
if (category != lastCategory) {
// start a new token
fStarts[fCount++]= i;
lastCategory= category;
}
fLengths[fCount-1]++;
}
}
@Override
public int getRangeCount() {
return fCount;
}
@Override
public int getTokenStart(int index) {
if (index < fCount)
return fStarts[index];
return fText.length();
}
@Override
public int getTokenLength(int index) {
if (index < fCount)
return fLengths[index];
return 0;
}
@Override
public boolean rangesEqual(int thisIndex, IRangeComparator other, int otherIndex) {
if (other != null && getClass() == other.getClass()) {
TokenComparator tc= (TokenComparator) other;
int thisLen= getTokenLength(thisIndex);
int otherLen= tc.getTokenLength(otherIndex);
if (thisLen == otherLen)
return fText.regionMatches(false, getTokenStart(thisIndex), tc.fText, tc.getTokenStart(otherIndex), thisLen);
}
return false;
}
@Override
public boolean skipRangeComparison(int length, int max, IRangeComparator other) {
if (getRangeCount() < 50 || other.getRangeCount() < 50)
return false;
if (max < 100)
return false;
if (length < 100)
return false;
if (max > 800)
return true;
if (length < max / 4)
return false;
return true;
}
}
|