Skip to main content
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohn Arthorne2012-06-26 13:15:57 +0000
committerJohn Arthorne2012-07-24 17:19:06 +0000
commit6e3a775e71e0862084e1664b8fd3e4913b85f33d (patch)
tree1d37b119e6eb9ce12718d29b9d87682e9aac059f /org.eclipse.help.base/src
parent9a9baea13e41d06ec2177f4e2486794c723d45e8 (diff)
downloadeclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.gz
eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.xz
eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.zip
Bug 340563 - [Help][Search] Update Lucene 2.9.1 to the latest version
Diffstat (limited to 'org.eclipse.help.base/src')
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java11
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java2
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java98
3 files changed, 3 insertions, 108 deletions
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
index b5e76907d..84c3bb1af 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
@@ -19,8 +19,7 @@ import org.eclipse.core.runtime.Platform;
import org.eclipse.help.internal.base.HelpBasePlugin;
/**
- * Lucene Analyzer. LowerCaseTokenizer->WordTokenStream (uses word breaking in
- * java.text)
+ * Lucene Analyzer. LowerCaseFilter->StandardTokenizer.
*/
public class DefaultAnalyzer extends Analyzer {
@@ -81,14 +80,8 @@ public class DefaultAnalyzer extends Analyzer {
* Reader.
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
- String tokenizer = System.getProperty("help.lucene.tokenizer"); //$NON-NLS-1$
- //support reverting to standard lucene tokenizer based on system property
- if ("standard".equalsIgnoreCase(tokenizer)) { //$NON-NLS-1$
Version version = Version.LUCENE_CURRENT;
- return new LowerCaseFilter(new StandardTokenizer(version, reader));
- }
- //default Eclipse tokenizer
- return new LowerCaseFilter(new WordTokenStream(fieldName, reader, locale));
+ return new LowerCaseFilter(version, new StandardTokenizer(version, reader));
}
/**
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
index 916d58a65..b583ba253 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
@@ -106,7 +106,7 @@ public class SearchIndex implements ISearchIndex, IHelpSearchIndex {
public static final String DEPENDENCIES_KEY_ANALYZER = "analyzer"; //$NON-NLS-1$
- private static final String LUCENE_BUNDLE_ID = "org.apache.lucene"; //$NON-NLS-1$
+ private static final String LUCENE_BUNDLE_ID = "org.apache.lucene.core"; //$NON-NLS-1$
private static final String FIELD_NAME = "name"; //$NON-NLS-1$
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
deleted file mode 100644
index fc7ca0f64..000000000
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2000, 2011 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.help.internal.search;
-
-import java.io.*;
-import com.ibm.icu.text.BreakIterator;
-import java.util.ArrayList;
-import java.util.Locale;
-
-import org.apache.lucene.analysis.*;
-
-/**
- * WordTokenStream obtains tokens containing words appropriate for use with
- * Lucene search engine.
- */
-public final class WordTokenStream extends TokenStream {
- private static final int BUF_LEN = 4096;
- private static final int TOKENS_LEN = 512;
- private final Reader reader;
- private final BreakIterator boundary;
- private final ArrayList<Token> tokens;
- private int token;
- private int noTokens;
- private final char[] cbuf;
- /**
- * Constructor
- */
- public WordTokenStream(String fieldName, Reader reader, Locale locale) {
- this.reader = reader;
- boundary = BreakIterator.getWordInstance(locale);
- cbuf = new char[BUF_LEN];
- tokens = new ArrayList<Token>(TOKENS_LEN);
-
- }
- /**
- * @see TokenStream#next()
- */
- public final Token next() throws IOException {
- while (token >= noTokens) {
- // read BUF_LEN of chars
- int l;
- while ((l = reader.read(cbuf)) <= 0) {
- if (l < 0) {
- // EOF
- reader.close();
- return null;
- }
- }
- StringBuffer strbuf = new StringBuffer(l + 80);
- strbuf.append(cbuf, 0, l);
- // read more until white space (or EOF)
- int c;
- while (0 <= (c = reader.read())) {
- strbuf.append((char) c);
- if (c == ' ' || c == '\r' || c == '\n' || c == '\t') {
- break;
- }
- }
-
- String str = strbuf.toString();
- boundary.setText(str);
-
- int start = boundary.first();
- tokens.clear();
- wordsbreak : for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary
- .next()) {
- // determine if it is a word
- // any letter or digit between boundaries means it is a word
- for (int i = start; i < end; i++) {
- if (Character.isLetterOrDigit(str.charAt(i))) {
- // it is a word
- tokens.add(new Token(str.substring(start, end), start,
- end));
- continue wordsbreak;
- }
- }
- }
-
- if (c < 0) {
- reader.close();
- tokens.add((Token) null);
- }
- noTokens = tokens.size();
- token = 0;
- }
-
- return tokens.get(token++);
-
- }
-}

Back to the top