diff options
author | John Arthorne | 2012-06-26 13:15:57 +0000 |
---|---|---|
committer | John Arthorne | 2012-07-24 17:19:06 +0000 |
commit | 6e3a775e71e0862084e1664b8fd3e4913b85f33d (patch) | |
tree | 1d37b119e6eb9ce12718d29b9d87682e9aac059f /org.eclipse.help.base/src | |
parent | 9a9baea13e41d06ec2177f4e2486794c723d45e8 (diff) | |
download | eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.gz eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.xz eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.zip |
Bug 340563 - [Help][Search] Update Lucene 2.9.1 to the latest version
Diffstat (limited to 'org.eclipse.help.base/src')
3 files changed, 3 insertions, 108 deletions
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java index b5e76907d..84c3bb1af 100644 --- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java +++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java @@ -19,8 +19,7 @@ import org.eclipse.core.runtime.Platform; import org.eclipse.help.internal.base.HelpBasePlugin; /** - * Lucene Analyzer. LowerCaseTokenizer->WordTokenStream (uses word breaking in - * java.text) + * Lucene Analyzer. LowerCaseFilter->StandardTokenizer. */ public class DefaultAnalyzer extends Analyzer { @@ -81,14 +80,8 @@ public class DefaultAnalyzer extends Analyzer { * Reader. */ public final TokenStream tokenStream(String fieldName, Reader reader) { - String tokenizer = System.getProperty("help.lucene.tokenizer"); //$NON-NLS-1$ - //support reverting to standard lucene tokenizer based on system property - if ("standard".equalsIgnoreCase(tokenizer)) { //$NON-NLS-1$ Version version = Version.LUCENE_CURRENT; - return new LowerCaseFilter(new StandardTokenizer(version, reader)); - } - //default Eclipse tokenizer - return new LowerCaseFilter(new WordTokenStream(fieldName, reader, locale)); + return new LowerCaseFilter(version, new StandardTokenizer(version, reader)); } /** diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java index 916d58a65..b583ba253 100644 --- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java +++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java @@ -106,7 +106,7 @@ public class SearchIndex implements ISearchIndex, IHelpSearchIndex { public static final String DEPENDENCIES_KEY_ANALYZER = "analyzer"; //$NON-NLS-1$ - private static final String LUCENE_BUNDLE_ID = "org.apache.lucene"; //$NON-NLS-1$ + private static final String LUCENE_BUNDLE_ID = "org.apache.lucene.core"; //$NON-NLS-1$ private static final String FIELD_NAME = "name"; //$NON-NLS-1$ diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java deleted file mode 100644 index fc7ca0f64..000000000 --- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java +++ /dev/null @@ -1,98 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2000, 2011 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - *******************************************************************************/ -package org.eclipse.help.internal.search; - -import java.io.*; -import com.ibm.icu.text.BreakIterator; -import java.util.ArrayList; -import java.util.Locale; - -import org.apache.lucene.analysis.*; - -/** - * WordTokenStream obtains tokens containing words appropriate for use with - * Lucene search engine. - */ -public final class WordTokenStream extends TokenStream { - private static final int BUF_LEN = 4096; - private static final int TOKENS_LEN = 512; - private final Reader reader; - private final BreakIterator boundary; - private final ArrayList<Token> tokens; - private int token; - private int noTokens; - private final char[] cbuf; - /** - * Constructor - */ - public WordTokenStream(String fieldName, Reader reader, Locale locale) { - this.reader = reader; - boundary = BreakIterator.getWordInstance(locale); - cbuf = new char[BUF_LEN]; - tokens = new ArrayList<Token>(TOKENS_LEN); - - } - /** - * @see TokenStream#next() - */ - public final Token next() throws IOException { - while (token >= noTokens) { - // read BUF_LEN of chars - int l; - while ((l = reader.read(cbuf)) <= 0) { - if (l < 0) { - // EOF - reader.close(); - return null; - } - } - StringBuffer strbuf = new StringBuffer(l + 80); - strbuf.append(cbuf, 0, l); - // read more until white space (or EOF) - int c; - while (0 <= (c = reader.read())) { - strbuf.append((char) c); - if (c == ' ' || c == '\r' || c == '\n' || c == '\t') { - break; - } - } - - String str = strbuf.toString(); - boundary.setText(str); - - int start = boundary.first(); - tokens.clear(); - wordsbreak : for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary - .next()) { - // determine if it is a word - // any letter or digit between boundaries means it is a word - for (int i = start; i < end; i++) { - if (Character.isLetterOrDigit(str.charAt(i))) { - // it is a word - tokens.add(new Token(str.substring(start, end), start, - end)); - continue wordsbreak; - } - } - } - - if (c < 0) { - reader.close(); - tokens.add((Token) null); - } - noTokens = tokens.size(); - token = 0; - } - - return tokens.get(token++); - - } -} |