Bug 340563 - [Help][Search] Update Lucene 2.9.1 to the latest version

author: John Arthorne 2012-06-26 13:15:57 +0000
committer: John Arthorne 2012-07-24 17:19:06 +0000
commit: 6e3a775e71e0862084e1664b8fd3e4913b85f33d (patch)
tree: 1d37b119e6eb9ce12718d29b9d87682e9aac059f /org.eclipse.help.base/src
parent: 9a9baea13e41d06ec2177f4e2486794c723d45e8 (diff)
download: eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.gz
eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.xz
eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.zip
3 files changed, 3 insertions, 108 deletions
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
index b5e76907d..84c3bb1af 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
@@ -19,8 +19,7 @@ import org.eclipse.core.runtime.Platform;
 import org.eclipse.help.internal.base.HelpBasePlugin;
 
 /**
- * Lucene Analyzer. LowerCaseTokenizer->WordTokenStream (uses word breaking in
- * java.text)
+ * Lucene Analyzer. LowerCaseFilter->StandardTokenizer.
  */
 public class DefaultAnalyzer extends Analyzer {
 
@@ -81,14 +80,8 @@ public class DefaultAnalyzer extends Analyzer {
 	 * Reader.
 	 */
 	public final TokenStream tokenStream(String fieldName, Reader reader) {
-		String tokenizer = System.getProperty("help.lucene.tokenizer"); //$NON-NLS-1$
-		//support reverting to standard lucene tokenizer based on system property
-		if ("standard".equalsIgnoreCase(tokenizer)) { //$NON-NLS-1$
 			Version version = Version.LUCENE_CURRENT;
-			return new LowerCaseFilter(new StandardTokenizer(version, reader));
-		}
-		//default Eclipse tokenizer
-		return new LowerCaseFilter(new WordTokenStream(fieldName, reader, locale));
+			return new LowerCaseFilter(version, new StandardTokenizer(version, reader));
 	}
 
 	/**
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
index 916d58a65..b583ba253 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
@@ -106,7 +106,7 @@ public class SearchIndex implements ISearchIndex, IHelpSearchIndex {
 
 	public static final String DEPENDENCIES_KEY_ANALYZER = "analyzer"; //$NON-NLS-1$
 
-	private static final String LUCENE_BUNDLE_ID = "org.apache.lucene"; //$NON-NLS-1$
+	private static final String LUCENE_BUNDLE_ID = "org.apache.lucene.core"; //$NON-NLS-1$
 
 	private static final String FIELD_NAME = "name"; //$NON-NLS-1$
 
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
deleted file mode 100644
index fc7ca0f64..000000000
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
+++ /dev/null
@@ -1,98 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2000, 2011 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- *     IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.help.internal.search;
-
-import java.io.*;
-import com.ibm.icu.text.BreakIterator;
-import java.util.ArrayList;
-import java.util.Locale;
-
-import org.apache.lucene.analysis.*;
-
-/**
- * WordTokenStream obtains tokens containing words appropriate for use with
- * Lucene search engine.
- */
-public final class WordTokenStream extends TokenStream {
-	private static final int BUF_LEN = 4096;
-	private static final int TOKENS_LEN = 512;
-	private final Reader reader;
-	private final BreakIterator boundary;
-	private final ArrayList<Token> tokens;
-	private int token;
-	private int noTokens;
-	private final char[] cbuf;
-	/**
-	 * Constructor
-	 */
-	public WordTokenStream(String fieldName, Reader reader, Locale locale) {
-		this.reader = reader;
-		boundary = BreakIterator.getWordInstance(locale);
-		cbuf = new char[BUF_LEN];
-		tokens = new ArrayList<Token>(TOKENS_LEN);
-
-	}
-	/**
-	 * @see TokenStream#next()
-	 */
-	public final Token next() throws IOException {
-		while (token >= noTokens) {
-			// read BUF_LEN of chars
-			int l;
-			while ((l = reader.read(cbuf)) <= 0) {
-				if (l < 0) {
-					// EOF
-					reader.close();
-					return null;
-				}
-			}
-			StringBuffer strbuf = new StringBuffer(l + 80);
-			strbuf.append(cbuf, 0, l);
-			// read more until white space (or EOF)
-			int c;
-			while (0 <= (c = reader.read())) {
-				strbuf.append((char) c);
-				if (c == ' ' || c == '\r' || c == '\n' || c == '\t') {
-					break;
-				}
-			}
-
-			String str = strbuf.toString();
-			boundary.setText(str);
-
-			int start = boundary.first();
-			tokens.clear();
-			wordsbreak : for (int end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary
-					.next()) {
-				// determine if it is a word
-				// any letter or digit between boundaries means it is a word
-				for (int i = start; i < end; i++) {
-					if (Character.isLetterOrDigit(str.charAt(i))) {
-						// it is a word
-						tokens.add(new Token(str.substring(start, end), start,
-								end));
-						continue wordsbreak;
-					}
-				}
-			}
-
-			if (c < 0) {
-				reader.close();
-				tokens.add((Token) null);
-			}
-			noTokens = tokens.size();
-			token = 0;
-		}
-
-		return tokens.get(token++);
-
-	}
-}
author	John Arthorne	2012-06-26 13:15:57 +0000
committer	John Arthorne	2012-07-24 17:19:06 +0000
commit	6e3a775e71e0862084e1664b8fd3e4913b85f33d (patch)
tree	1d37b119e6eb9ce12718d29b9d87682e9aac059f /org.eclipse.help.base/src
parent	9a9baea13e41d06ec2177f4e2486794c723d45e8 (diff)
download	eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.gz eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.tar.xz eclipse.platform.ua-6e3a775e71e0862084e1664b8fd3e4913b85f33d.zip