diff options
author | John Arthorne | 2012-06-26 17:29:00 +0000 |
---|---|---|
committer | John Arthorne | 2012-07-24 17:19:08 +0000 |
commit | 42c842263c0e3058d987be5b3283c350f4c53d4f (patch) | |
tree | 2ed1e689e82d0e6651291be4add836b403fce07d | |
parent | 669fa15cc859773492bf292b70ec998e921293aa (diff) | |
download | eclipse.platform.ua-42c842263c0e3058d987be5b3283c350f4c53d4f.tar.gz eclipse.platform.ua-42c842263c0e3058d987be5b3283c350f4c53d4f.tar.xz eclipse.platform.ua-42c842263c0e3058d987be5b3283c350f4c53d4f.zip |
Bug 340563 - [Help][Search] Update Lucene 2.9.1 to the latest version
5 files changed, 124 insertions, 9 deletions
diff --git a/org.eclipse.help.base/META-INF/MANIFEST.MF b/org.eclipse.help.base/META-INF/MANIFEST.MF index f63cb9d9c..919783c3f 100644 --- a/org.eclipse.help.base/META-INF/MANIFEST.MF +++ b/org.eclipse.help.base/META-INF/MANIFEST.MF @@ -47,10 +47,11 @@ Import-Package: com.ibm.icu.text, org.apache.lucene.analysis;version="3.5.0", org.apache.lucene.analysis.standard;version="3.5.0", org.apache.lucene.analysis.tokenattributes;version="3.5.0", + org.apache.lucene.collation;version="3.5.0", org.apache.lucene.document;version="3.5.0", org.apache.lucene.index;core=split;version="3.5.0", org.apache.lucene.search;core=split;version="3.5.0", - org.apache.lucene.store;core="split";version="3.5.0", + org.apache.lucene.store;core=split;version="3.5.0", org.apache.lucene.util;version="3.5.0", org.eclipse.equinox.http.jetty;resolution:=optional Bundle-RequiredExecutionEnvironment: J2SE-1.5 diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java index 84c3bb1af..8c87edac4 100644 --- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java +++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java @@ -19,7 +19,8 @@ import org.eclipse.core.runtime.Platform; import org.eclipse.help.internal.base.HelpBasePlugin; /** - * Lucene Analyzer. LowerCaseFilter->StandardTokenizer. + * Lucene Analyzer. LowerCaseTokenizer->WordTokenStream (uses word breaking in + * java.text) */ public class DefaultAnalyzer extends Analyzer { @@ -80,8 +81,7 @@ public class DefaultAnalyzer extends Analyzer { * Reader. */ public final TokenStream tokenStream(String fieldName, Reader reader) { - Version version = Version.LUCENE_CURRENT; - return new LowerCaseFilter(version, new StandardTokenizer(version, reader)); + return new LowerCaseFilter(new WordTokenStream(fieldName, reader, locale)); } /** diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java index 0df55e0e4..6a3809c25 100644 --- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java +++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java @@ -19,7 +19,7 @@ import java.util.Locale; import java.util.StringTokenizer; import org.apache.lucene.analysis.*; -import org.apache.lucene.analysis.tokenattributes.TermAttribute; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.index.*; import org.apache.lucene.search.*; import org.eclipse.help.internal.base.*; @@ -245,11 +245,10 @@ public class QueryBuilder { Reader reader = new StringReader(text); TokenStream tStream = analyzer.tokenStream(fieldName, reader); - TermAttribute termAttribute = (TermAttribute) tStream.getAttribute(TermAttribute.class); - + CharTermAttribute termAttribute = (CharTermAttribute) tStream.getAttribute(CharTermAttribute.class); try { while (tStream.incrementToken()) { - String term = termAttribute.term(); + String term = termAttribute.toString(); words.add(term); } reader.close(); diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java new file mode 100644 index 000000000..28b431b65 --- /dev/null +++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java @@ -0,0 +1,112 @@ +/******************************************************************************* + * Copyright (c) 2000, 2012 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ +package org.eclipse.help.internal.search; + +import com.ibm.icu.text.BreakIterator; +import java.io.IOException; +import java.io.Reader; +import java.util.Locale; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; + +/** + * WordTokenStream obtains tokens containing words appropriate for use with + * Lucene search engine. + */ +public final class WordTokenStream extends Tokenizer { + private static final int BUF_LEN = 4096; + private final Reader reader; + private final BreakIterator boundary; + private StringBuffer strbuf; + + private int start = 0; + private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); + + /** + * Constructor + */ + public WordTokenStream(String fieldName, Reader reader, Locale locale) { + this.reader = reader; + boundary = BreakIterator.getWordInstance(locale); + + } + /** + * @see TokenStream#incrementToken() + */ + @Override + public boolean incrementToken() throws IOException { + clearAttributes(); + int length = 0; + char[] buffer = termAtt.buffer(); + + int end; + if(strbuf == null) { + int available; + char[] cbuf = new char[BUF_LEN]; + while ((available = reader.read(cbuf)) <= 0) { + if (available < 0) { + reader.close(); + return false; + } + } + strbuf = new StringBuffer(available + 80); + strbuf.append(cbuf, 0, available); + // read more until white space (or EOF) + int c; + while (0 <= (c = reader.read())) { + strbuf.append((char) c); + if (c == ' ' || c == '\r' || c == '\n' || c == '\t') { + break; + } + } + + if (c < 0) { + reader.close(); + } + + boundary.setText(strbuf.toString()); + start = boundary.first(); + } + else { + start = boundary.next(); + } + + for (end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) { + // determine if it is a word + // any letter or digit between boundaries means it is a word + for (int i = start; i < end; i++) { + if (Character.isLetterOrDigit(strbuf.charAt(i))) { + // it is a word + length = end - start; + if (length >= buffer.length-1) + buffer = termAtt.resizeBuffer(2+length); + strbuf.getChars(start, end, buffer, 0); + return true; + } + } + } + + return false; + } + + public void reset() throws IOException { + super.reset(); + clearAttributes(); + } + + public void close() throws IOException { + /// Unlikely to be called as this is a reused + if (this.reader != null) { + this.reader.close(); + } + } +} diff --git a/org.eclipse.ua.tests/META-INF/MANIFEST.MF b/org.eclipse.ua.tests/META-INF/MANIFEST.MF index bfbd6ea42..feadd44e0 100644 --- a/org.eclipse.ua.tests/META-INF/MANIFEST.MF +++ b/org.eclipse.ua.tests/META-INF/MANIFEST.MF @@ -24,7 +24,10 @@ Bundle-ActivationPolicy: lazy Bundle-Vendor: Eclipse.org Bundle-ClassPath: ua-tests.jar Import-Package: javax.servlet;version="2.4.0", - javax.servlet.http;version="2.4.0" + javax.servlet.http;version="2.4.0", + org.apache.lucene.index;core="split";version="3.5.0", + org.apache.lucene.search;core="split";version="3.5.0", + org.apache.lucene.store;core="split";version="3.5.0" Bundle-RequiredExecutionEnvironment: J2SE-1.5 Export-Package: org.eclipse.ua.tests, org.eclipse.ua.tests.browser, |