Skip to main content
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Arthorne2012-06-26 17:29:00 +0000
committerJohn Arthorne2012-07-24 17:19:08 +0000
commit42c842263c0e3058d987be5b3283c350f4c53d4f (patch)
tree2ed1e689e82d0e6651291be4add836b403fce07d
parent669fa15cc859773492bf292b70ec998e921293aa (diff)
downloadeclipse.platform.ua-42c842263c0e3058d987be5b3283c350f4c53d4f.tar.gz
eclipse.platform.ua-42c842263c0e3058d987be5b3283c350f4c53d4f.tar.xz
eclipse.platform.ua-42c842263c0e3058d987be5b3283c350f4c53d4f.zip
Bug 340563 - [Help][Search] Update Lucene 2.9.1 to the latest version
-rw-r--r--org.eclipse.help.base/META-INF/MANIFEST.MF3
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java6
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java7
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java112
-rw-r--r--org.eclipse.ua.tests/META-INF/MANIFEST.MF5
5 files changed, 124 insertions, 9 deletions
diff --git a/org.eclipse.help.base/META-INF/MANIFEST.MF b/org.eclipse.help.base/META-INF/MANIFEST.MF
index f63cb9d9c..919783c3f 100644
--- a/org.eclipse.help.base/META-INF/MANIFEST.MF
+++ b/org.eclipse.help.base/META-INF/MANIFEST.MF
@@ -47,10 +47,11 @@ Import-Package: com.ibm.icu.text,
org.apache.lucene.analysis;version="3.5.0",
org.apache.lucene.analysis.standard;version="3.5.0",
org.apache.lucene.analysis.tokenattributes;version="3.5.0",
+ org.apache.lucene.collation;version="3.5.0",
org.apache.lucene.document;version="3.5.0",
org.apache.lucene.index;core=split;version="3.5.0",
org.apache.lucene.search;core=split;version="3.5.0",
- org.apache.lucene.store;core="split";version="3.5.0",
+ org.apache.lucene.store;core=split;version="3.5.0",
org.apache.lucene.util;version="3.5.0",
org.eclipse.equinox.http.jetty;resolution:=optional
Bundle-RequiredExecutionEnvironment: J2SE-1.5
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
index 84c3bb1af..8c87edac4 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
@@ -19,7 +19,8 @@ import org.eclipse.core.runtime.Platform;
import org.eclipse.help.internal.base.HelpBasePlugin;
/**
- * Lucene Analyzer. LowerCaseFilter->StandardTokenizer.
+ * Lucene Analyzer. LowerCaseTokenizer->WordTokenStream (uses word breaking in
+ * java.text)
*/
public class DefaultAnalyzer extends Analyzer {
@@ -80,8 +81,7 @@ public class DefaultAnalyzer extends Analyzer {
* Reader.
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
- Version version = Version.LUCENE_CURRENT;
- return new LowerCaseFilter(version, new StandardTokenizer(version, reader));
+ return new LowerCaseFilter(new WordTokenStream(fieldName, reader, locale));
}
/**
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
index 0df55e0e4..6a3809c25 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
@@ -19,7 +19,7 @@ import java.util.Locale;
import java.util.StringTokenizer;
import org.apache.lucene.analysis.*;
-import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
import org.eclipse.help.internal.base.*;
@@ -245,11 +245,10 @@ public class QueryBuilder {
Reader reader = new StringReader(text);
TokenStream tStream = analyzer.tokenStream(fieldName, reader);
- TermAttribute termAttribute = (TermAttribute) tStream.getAttribute(TermAttribute.class);
-
+ CharTermAttribute termAttribute = (CharTermAttribute) tStream.getAttribute(CharTermAttribute.class);
try {
while (tStream.incrementToken()) {
- String term = termAttribute.term();
+ String term = termAttribute.toString();
words.add(term);
}
reader.close();
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
new file mode 100644
index 000000000..28b431b65
--- /dev/null
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
@@ -0,0 +1,112 @@
+/*******************************************************************************
+ * Copyright (c) 2000, 2012 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.help.internal.search;
+
+import com.ibm.icu.text.BreakIterator;
+import java.io.IOException;
+import java.io.Reader;
+import java.util.Locale;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+
+/**
+ * WordTokenStream obtains tokens containing words appropriate for use with
+ * Lucene search engine.
+ */
+public final class WordTokenStream extends Tokenizer {
+ private static final int BUF_LEN = 4096;
+ private final Reader reader;
+ private final BreakIterator boundary;
+ private StringBuffer strbuf;
+
+ private int start = 0;
+ private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+ /**
+ * Constructor
+ */
+ public WordTokenStream(String fieldName, Reader reader, Locale locale) {
+ this.reader = reader;
+ boundary = BreakIterator.getWordInstance(locale);
+
+ }
+ /**
+ * @see TokenStream#incrementToken()
+ */
+ @Override
+ public boolean incrementToken() throws IOException {
+ clearAttributes();
+ int length = 0;
+ char[] buffer = termAtt.buffer();
+
+ int end;
+ if(strbuf == null) {
+ int available;
+ char[] cbuf = new char[BUF_LEN];
+ while ((available = reader.read(cbuf)) <= 0) {
+ if (available < 0) {
+ reader.close();
+ return false;
+ }
+ }
+ strbuf = new StringBuffer(available + 80);
+ strbuf.append(cbuf, 0, available);
+ // read more until white space (or EOF)
+ int c;
+ while (0 <= (c = reader.read())) {
+ strbuf.append((char) c);
+ if (c == ' ' || c == '\r' || c == '\n' || c == '\t') {
+ break;
+ }
+ }
+
+ if (c < 0) {
+ reader.close();
+ }
+
+ boundary.setText(strbuf.toString());
+ start = boundary.first();
+ }
+ else {
+ start = boundary.next();
+ }
+
+ for (end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
+ // determine if it is a word
+ // any letter or digit between boundaries means it is a word
+ for (int i = start; i < end; i++) {
+ if (Character.isLetterOrDigit(strbuf.charAt(i))) {
+ // it is a word
+ length = end - start;
+ if (length >= buffer.length-1)
+ buffer = termAtt.resizeBuffer(2+length);
+ strbuf.getChars(start, end, buffer, 0);
+ return true;
+ }
+ }
+ }
+
+ return false;
+ }
+
+ public void reset() throws IOException {
+ super.reset();
+ clearAttributes();
+ }
+
+ public void close() throws IOException {
+ /// Unlikely to be called as this is a reused
+ if (this.reader != null) {
+ this.reader.close();
+ }
+ }
+}
diff --git a/org.eclipse.ua.tests/META-INF/MANIFEST.MF b/org.eclipse.ua.tests/META-INF/MANIFEST.MF
index bfbd6ea42..feadd44e0 100644
--- a/org.eclipse.ua.tests/META-INF/MANIFEST.MF
+++ b/org.eclipse.ua.tests/META-INF/MANIFEST.MF
@@ -24,7 +24,10 @@ Bundle-ActivationPolicy: lazy
Bundle-Vendor: Eclipse.org
Bundle-ClassPath: ua-tests.jar
Import-Package: javax.servlet;version="2.4.0",
- javax.servlet.http;version="2.4.0"
+ javax.servlet.http;version="2.4.0",
+ org.apache.lucene.index;core="split";version="3.5.0",
+ org.apache.lucene.search;core="split";version="3.5.0",
+ org.apache.lucene.store;core="split";version="3.5.0"
Bundle-RequiredExecutionEnvironment: J2SE-1.5
Export-Package: org.eclipse.ua.tests,
org.eclipse.ua.tests.browser,

Back to the top