Skip to main content
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSopot Cela2016-11-09 12:43:26 +0000
committerSopot Cela2016-11-09 12:43:26 +0000
commitad603c7e425e44239148f16c15da24e13bab153b (patch)
treeedc070d262d069b51938268fe3b72234fa18aab3 /org.eclipse.help.base
parent5bf6a6d33bac1e234a0d3b153d59d3c0d6229a42 (diff)
downloadeclipse.platform.ua-ad603c7e425e44239148f16c15da24e13bab153b.tar.gz
eclipse.platform.ua-ad603c7e425e44239148f16c15da24e13bab153b.tar.xz
eclipse.platform.ua-ad603c7e425e44239148f16c15da24e13bab153b.zip
Note that for exact analyzer use (wildcard and exact searches using quotes) the number of results will not match because of a bug in the old code. The files added under folders .../indexXYZ are needed for compatibility tests which I added. See PrebuiltIndexCompatibility.java changes. Change-Id: Id18cdb387ada7f2c30eecbe59b726d3bf6dd00c1 Signed-off-by: Sopot Cela <scela@redhat.com>
Diffstat (limited to 'org.eclipse.help.base')
-rw-r--r--org.eclipse.help.base/META-INF/MANIFEST.MF8
-rw-r--r--org.eclipse.help.base/plugin.xml8
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java29
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java33
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java40
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java30
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java23
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java14
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java43
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java16
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java10
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java9
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java133
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java24
-rw-r--r--org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java115
15 files changed, 228 insertions, 307 deletions
diff --git a/org.eclipse.help.base/META-INF/MANIFEST.MF b/org.eclipse.help.base/META-INF/MANIFEST.MF
index 1089d1a7d..ba46905dd 100644
--- a/org.eclipse.help.base/META-INF/MANIFEST.MF
+++ b/org.eclipse.help.base/META-INF/MANIFEST.MF
@@ -43,9 +43,11 @@ Require-Bundle: org.eclipse.ant.core;bundle-version="3.2.200";resolution:=option
org.eclipse.core.runtime;bundle-version="[3.11.0,4.0.0)",
org.eclipse.help;bundle-version="[3.5.0,4.0.0)";visibility:=reexport,
org.eclipse.core.expressions;bundle-version="[3.4.200,4.0.0)",
- org.apache.lucene.analysis;bundle-version="[3.5.0,4.0.0)",
- org.apache.lucene.core;bundle-version="[3.5.0,4.0.0)",
- org.eclipse.core.net;bundle-version="1.2.200"
+ org.eclipse.core.net;bundle-version="1.2.200",
+ org.apache.lucene.analyzers-common;bundle-version="6.1.0",
+ org.apache.lucene.core;bundle-version="6.1.0",
+ org.apache.lucene.misc;bundle-version="6.1.0",
+ org.apache.lucene.analyzers-smartcn;bundle-version="6.1.0"
Import-Package: com.ibm.icu.text,
org.eclipse.equinox.http.jetty;resolution:=optional
Bundle-RequiredExecutionEnvironment: JavaSE-1.8
diff --git a/org.eclipse.help.base/plugin.xml b/org.eclipse.help.base/plugin.xml
index 07a5a22d4..f21724b1a 100644
--- a/org.eclipse.help.base/plugin.xml
+++ b/org.eclipse.help.base/plugin.xml
@@ -82,10 +82,6 @@
class="org.eclipse.help.internal.search.AnalyzerFactory:ko">
</analyzer>
<analyzer
- locale="zh"
- class="org.apache.lucene.analysis.cn.ChineseAnalyzer">
- </analyzer>
- <analyzer
locale="cs"
class="org.eclipse.help.internal.search.AnalyzerFactory:cs">
</analyzer>
@@ -102,6 +98,10 @@
class="org.eclipse.help.internal.search.AnalyzerFactory:fr">
</analyzer>
<analyzer
+ class="org.apache.lucene.analysis.cn.smart.SmartChineseAnalyzer"
+ locale="zh">
+ </analyzer>
+ <analyzer
locale="nl"
class="org.eclipse.help.internal.search.AnalyzerFactory:nl">
</analyzer>
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java
index 63a170864..9bd03f938 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/AnalyzerFactory.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2012, 2015 IBM Corporation and others.
+ * Copyright (c) 2012, 2016 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -7,6 +7,7 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
@@ -19,8 +20,9 @@ import org.apache.lucene.analysis.el.GreekAnalyzer;
import org.apache.lucene.analysis.fr.FrenchAnalyzer;
import org.apache.lucene.analysis.nl.DutchAnalyzer;
import org.apache.lucene.analysis.ru.RussianAnalyzer;
-import org.apache.lucene.util.Version;
-import org.eclipse.core.runtime.*;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.core.runtime.IConfigurationElement;
+import org.eclipse.core.runtime.IExecutableExtension;
/**
* A factory responsible for instantiating a lucene {@link Analyzer}.
@@ -30,27 +32,26 @@ public class AnalyzerFactory implements IExecutableExtension{
public Analyzer create() {
if (locale == null)
return null;
- Version version = Version.LUCENE_35;
if ("pt".equals(locale)) //$NON-NLS-1$
- return new BrazilianAnalyzer(version);
+ return new BrazilianAnalyzer();
if ("ja".equals(locale)) //$NON-NLS-1$
- return new CJKAnalyzer(version);
+ return new CJKAnalyzer();
if ("ko".equals(locale)) //$NON-NLS-1$
- return new CJKAnalyzer(version);
+ return new CJKAnalyzer();
if ("pt".equals(locale)) //$NON-NLS-1$
- return new BrazilianAnalyzer(version);
+ return new BrazilianAnalyzer();
if ("cs".equals(locale)) //$NON-NLS-1$
- return new CzechAnalyzer(version);
+ return new CzechAnalyzer();
if ("de".equals(locale)) //$NON-NLS-1$
- return new GermanAnalyzer(version);
+ return new GermanAnalyzer();
if ("el".equals(locale)) //$NON-NLS-1$
- return new GreekAnalyzer(version);
+ return new GreekAnalyzer();
if ("fr".equals(locale)) //$NON-NLS-1$
- return new FrenchAnalyzer(version);
+ return new FrenchAnalyzer();
if ("nl".equals(locale)) //$NON-NLS-1$
- return new DutchAnalyzer(version);
+ return new DutchAnalyzer();
if ("ru".equals(locale)) //$NON-NLS-1$
- return new RussianAnalyzer(version);
+ return new RussianAnalyzer();
//unknown language
return null;
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
index 126e6c9f7..b6ccf6f16 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/Analyzer_en.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2000, 2015 IBM Corporation and others.
+ * Copyright (c) 2000, 2016 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -8,16 +8,23 @@
* Contributors:
* IBM Corporation - initial API and implementation
* Alexander Kurtakov - Bug 460787
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
-import java.io.*;
+
import java.util.HashSet;
import java.util.Set;
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.analysis.en.PorterStemFilter;
+import org.apache.lucene.analysis.util.CharArraySet;
+
/**
- * Lucene Analyzer for English. LowerCaseTokenizer->StopFilter->PorterStemFilter
+ * Lucene Analyzer for English.
+ * LowerCaseAndDigitsTokenizer->StopFilter->PorterStemFilter
*/
public final class Analyzer_en extends Analyzer {
/**
@@ -26,13 +33,19 @@ public final class Analyzer_en extends Analyzer {
public Analyzer_en() {
super();
}
- /**
- * Creates a TokenStream which tokenizes all the text in the provided
- * Reader.
+
+ /*
+ * Can't use try-with-resources because the Lucene internally reuses
+ * components. See {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy}
*/
+ @SuppressWarnings("resource")
@Override
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new PorterStemFilter(new StopFilter(Version.LUCENE_30, new LowerCaseAndDigitsTokenizer(reader), getStopWords(), false));
+ protected TokenStreamComponents createComponents(String fieldName) {
+ final Tokenizer source;
+ source = new LowerCaseAndDigitsTokenizer();
+ TokenStream result = new StopFilter(source, new CharArraySet(getStopWords(), false));
+ result = new PorterStemFilter(result);
+ return new TokenStreamComponents(source, result);
}
private Set<String> stopWords;
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
index deb65411d..dae2a3557 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/DefaultAnalyzer.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2000, 2015 IBM Corporation and others. All rights reserved. This program and the
+ * Copyright (c) 2000, 2016 IBM Corporation and others. All rights reserved. This program and the
* accompanying materials are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
@@ -7,27 +7,25 @@
* Contributors:
* IBM Corporation - initial API and implementation
* Alexander Kurtakov - Bug 460787
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
-import java.io.Reader;
import java.util.Locale;
import java.util.StringTokenizer;
-import com.ibm.icu.text.BreakIterator;
-
import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.LowerCaseFilter;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.core.LowerCaseFilter;
+import org.apache.lucene.analysis.standard.StandardTokenizer;
+import org.eclipse.core.runtime.Platform;
import org.eclipse.help.internal.base.HelpBasePlugin;
-import org.eclipse.core.runtime.Platform;
+import com.ibm.icu.text.BreakIterator;
/**
- * Lucene Analyzer. LowerCaseTokenizer->WordTokenStream (uses word breaking in
- * java.text)
+ * Lucene Analyzer. LowerCaseFilter->StandardTokenizer
*/
public final class DefaultAnalyzer extends Analyzer {
@@ -84,15 +82,6 @@ public final class DefaultAnalyzer extends Analyzer {
}
/**
- * Creates a TokenStream which tokenizes all the text in the provided
- * Reader.
- */
- @Override
- public final TokenStream tokenStream(String fieldName, Reader reader) {
- return new LowerCaseFilter(Version.LUCENE_30, new WordTokenStream(fieldName, reader, locale));
- }
-
- /**
* Creates a Locale object out of a string representation
*/
private Locale getLocale(String clientLocale) {
@@ -112,4 +101,17 @@ public final class DefaultAnalyzer extends Analyzer {
else
return Locale.getDefault();
}
+
+ /*
+ * Can't use try-with-resources because the Lucene internally reuses
+ * components. See {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy}
+ */
+ @SuppressWarnings("resource")
+ @Override
+ protected TokenStreamComponents createComponents(String fieldName) {
+ Tokenizer source = new StandardTokenizer();
+ LowerCaseFilter filter = new LowerCaseFilter(source);
+ TokenStreamComponents components = new TokenStreamComponents(source, filter);
+ return components;
+ }
}
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
index a47bcbca2..4fb3de814 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LowerCaseAndDigitsTokenizer.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2000, 2015 IBM Corporation and others.
+ * Copyright (c) 2000, 2016 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -8,30 +8,26 @@
* Contributors:
* IBM Corporation - initial API and implementation
* Alexander Kurtakov - Bug 460787
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
-import java.io.*;
-
-import org.apache.lucene.analysis.*;
-import org.apache.lucene.util.Version;
+import org.apache.lucene.analysis.util.CharTokenizer;
/**
- * Tokenizer breaking words around letters or digits.
+ * Tokenizer breaking words around letters or digits. Also normalizes to lower
+ * case.
*/
public class LowerCaseAndDigitsTokenizer extends CharTokenizer {
- public LowerCaseAndDigitsTokenizer(Reader input) {
- super(Version.LUCENE_30, input);
- }
- @Override
- protected char normalize(char c) {
- return Character.toLowerCase(c);
- }
+ @Override
+ protected boolean isTokenChar(int c) {
+ return Character.isLetterOrDigit(c);
+ }
- @Override
- protected boolean isTokenChar(char c) {
- return Character.isLetterOrDigit(c);
- }
+ @Override
+ protected int normalize(int c) {
+ return Character.toLowerCase(c);
+ }
}
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java
index 95480f2c6..620b189d6 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/LuceneSearchDocument.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2010, 2015 IBM Corporation and others.
+ * Copyright (c) 2010, 2016 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -7,6 +7,7 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
@@ -16,6 +17,8 @@ import java.io.StringReader;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.TextField;
import org.eclipse.help.search.ISearchDocument;
/**
@@ -32,25 +35,25 @@ public class LuceneSearchDocument implements ISearchDocument {
@Override
public void setTitle(String title) {
- doc.add(new Field("title", title, Field.Store.NO, Field.Index.ANALYZED)); //$NON-NLS-1$
- doc.add(new Field("exact_title", title, Field.Store.NO, Field.Index.ANALYZED)); //$NON-NLS-1$
- doc.add(new Field("raw_title", title, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
+ doc.add(new TextField("title", title, Field.Store.NO)); //$NON-NLS-1$
+ doc.add(new TextField("exact_title", title, Field.Store.NO)); //$NON-NLS-1$
+ doc.add(new StoredField("raw_title", title)); //$NON-NLS-1$
}
@Override
public void setSummary(String summary) {
- doc.add(new Field("summary", summary, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
+ doc.add(new StoredField("summary", summary)); //$NON-NLS-1$
}
@Override
public void addContents(String contents) {
- doc.add(new Field("contents", new StringReader(contents))); //$NON-NLS-1$
- doc.add(new Field("exact_contents", new StringReader(contents))); //$NON-NLS-1$
+ doc.add(new TextField("contents", new StringReader(contents))); //$NON-NLS-1$
+ doc.add(new TextField("exact_contents", new StringReader(contents))); //$NON-NLS-1$
}
@Override
public void setHasFilters(boolean hasFilters) {
- doc.add(new Field("filters", Boolean.toString(hasFilters), Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
+ doc.add(new StoredField("filters", Boolean.toString(hasFilters))); //$NON-NLS-1$
}
public Document getDocument() {
@@ -59,8 +62,8 @@ public class LuceneSearchDocument implements ISearchDocument {
@Override
public void addContents(Reader contents, Reader exactContents) {
- doc.add(new Field("contents", contents)); //$NON-NLS-1$
- doc.add(new Field("exact_contents", exactContents)); //$NON-NLS-1$
+ doc.add(new TextField("contents", contents)); //$NON-NLS-1$
+ doc.add(new TextField("exact_contents", exactContents)); //$NON-NLS-1$
}
}
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java
index f57426d4d..ddacba5a7 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/PluginIndex.java
@@ -7,6 +7,7 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
@@ -131,13 +132,12 @@ public class PluginIndex {
}
public boolean isCompatible(Bundle bundle, IPath prefixedPath) {
- URL url = FileLocator.find(bundle, prefixedPath
- .append(SearchIndex.DEPENDENCIES_VERSION_FILENAME), null);
+ URL url = FileLocator.find(bundle, prefixedPath.append(SearchIndex.DEPENDENCIES_VERSION_FILENAME), null);
if (url == null) {
- HelpBasePlugin.logError(prefixedPath
- .append(SearchIndex.DEPENDENCIES_VERSION_FILENAME)
- + " file missing from help index \"" //$NON-NLS-1$
- + path + "\" of plugin " + getPluginId(), null); //$NON-NLS-1$
+ HelpBasePlugin.logError(
+ prefixedPath.append(SearchIndex.DEPENDENCIES_VERSION_FILENAME) + " file missing from help index \"" //$NON-NLS-1$
+ + path + "\" of plugin " + getPluginId(), //$NON-NLS-1$
+ null);
return false;
}
@@ -150,6 +150,8 @@ public class PluginIndex {
.getProperty(SearchIndex.DEPENDENCIES_KEY_ANALYZER);
if (!targetIndex.isLuceneCompatible(lucene)
|| !targetIndex.isAnalyzerCompatible(analyzer)) {
+ HelpBasePlugin.logError("Error trying to consume Lucene index from bundle " + bundle.toString() //$NON-NLS-1$
+ + ". Please use an index built with Lucene 6.1 or higher.", null); //$NON-NLS-1$
return false;
}
} catch (MalformedURLException mue) {
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
index 19fa62049..e4fd578c0 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryBuilder.java
@@ -8,6 +8,7 @@
* Contributors:
* IBM Corporation - initial API and implementation
* Chris Torrence - patch for bug Bug 107648
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
import java.io.*;
@@ -22,6 +23,7 @@ import org.apache.lucene.analysis.*;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.*;
import org.apache.lucene.search.*;
+import org.apache.lucene.search.BooleanQuery.Builder;
import org.eclipse.help.internal.base.*;
/**
* Build query acceptable by the search engine.
@@ -223,8 +225,8 @@ public class QueryBuilder {
String word = it.next();
phrase.addWord(word);
// add analyzed word to the list of words to highlight
- // if (!highlightWords.contains(word))
- // highlightWords.add(word);
+ if (!highlightWords.contains(word))
+ highlightWords.add(word);
}
// add phrase only if not empty
if (phrase.getWords().size() > 0) {
@@ -243,6 +245,7 @@ public class QueryBuilder {
private List<String> analyzeText(Analyzer analyzer, String fieldName, String text) {
List<String> words = new ArrayList<>(1);
try (Reader reader = new StringReader(text); TokenStream tStream = analyzer.tokenStream(fieldName, reader)) {
+ tStream.reset();
CharTermAttribute termAttribute = tStream.getAttribute(CharTermAttribute.class);
while (tStream.incrementToken()) {
String term = termAttribute.toString();
@@ -300,12 +303,12 @@ public class QueryBuilder {
return oredQueries;
}
private Query orQueries(Collection<Query> queries) {
- BooleanQuery bq = new BooleanQuery();
+ Builder builder = new BooleanQuery.Builder();
for (Iterator<Query> it = queries.iterator(); it.hasNext();) {
Query q = it.next();
- bq.add(q, BooleanClause.Occur.SHOULD);
+ builder.add(q, BooleanClause.Occur.SHOULD);
}
- return bq;
+ return builder.build();
}
/**
* Obtains Lucene Query for tokens containing only AND and NOT operators.
@@ -314,7 +317,7 @@ public class QueryBuilder {
*/
private Query getRequiredQuery(List<QueryWordsToken> requiredTokens, String[] fieldNames,
float[] boosts) {
- BooleanQuery retQuery = new BooleanQuery();
+ Builder retQueryBuilder = new BooleanQuery.Builder();
boolean requiredTermExist = false;
// Parse tokens left to right
QueryWordsToken operator = null;
@@ -333,22 +336,22 @@ public class QueryBuilder {
// creates the boolean query of all fields
Query q = qs[0];
if (fieldNames.length > 1) {
- BooleanQuery allFieldsQuery = new BooleanQuery();
+ Builder allFieldsQueryBuilder = new BooleanQuery.Builder();
for (int f = 0; f < fieldNames.length; f++)
- allFieldsQuery.add(qs[f], BooleanClause.Occur.SHOULD);
- q = allFieldsQuery;
+ allFieldsQueryBuilder.add(qs[f], BooleanClause.Occur.SHOULD);
+ q = allFieldsQueryBuilder.build();
}
if (operator != null && operator.type == QueryWordsToken.NOT) {
- retQuery.add(q, BooleanClause.Occur.MUST_NOT); // add as prohibited
+ retQueryBuilder.add(q, BooleanClause.Occur.MUST_NOT); // prohibited
} else {
- retQuery.add(q, BooleanClause.Occur.MUST); // add as required
+ retQueryBuilder.add(q, BooleanClause.Occur.MUST); // required
requiredTermExist = true;
}
}
if (!requiredTermExist) {
return null; // cannot search for prohibited only
}
- return retQuery;
+ return retQueryBuilder.build();
}
private Query getLuceneQuery(String[] fieldNames, float[] boosts) {
Query luceneQuery = createLuceneQuery(analyzedTokens, fieldNames,
@@ -423,20 +426,20 @@ public class QueryBuilder {
if (analyzedTokens.get(i).type != QueryWordsToken.WORD)
return query;
// Create phrase query for all tokens and OR with original query
- BooleanQuery booleanQuery = new BooleanQuery();
- booleanQuery.add(query, BooleanClause.Occur.SHOULD);
- PhraseQuery[] phraseQueries = new PhraseQuery[fields.length];
+ Builder booleanQueryBuilder = new BooleanQuery.Builder();
+ booleanQueryBuilder.add(query, BooleanClause.Occur.SHOULD);
+ PhraseQuery.Builder[] phraseQueriesBuilders = new PhraseQuery.Builder[fields.length];
for (int f = 0; f < fields.length; f++) {
- phraseQueries[f] = new PhraseQuery();
+ phraseQueriesBuilders[f] = new PhraseQuery.Builder();
for (int i = 0; i < analyzedTokens.size(); i++) {
Term t = new Term(fields[f], analyzedTokens
.get(i).value);
- phraseQueries[f].add(t);
+ phraseQueriesBuilders[f].add(t);
}
- phraseQueries[f].setBoost(10 * boosts[f]);
- booleanQuery.add(phraseQueries[f], BooleanClause.Occur.SHOULD);
+ Query boostQuery = new BoostQuery(phraseQueriesBuilders[f].build(), 10 * boosts[f]);
+ booleanQueryBuilder.add(boostQuery, BooleanClause.Occur.SHOULD);
}
- return booleanQuery;
+ return booleanQueryBuilder.build();
}
/**
* Obtains analyzed terms from query as one string. Words are double quoted,
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java
index 5c2e02b4e..ca5ac27e8 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsExactPhrase.java
@@ -7,14 +7,17 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
-import org.apache.lucene.index.*;
-import org.apache.lucene.search.*;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.search.BoostQuery;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
/**
* Represents a quoted token in user search query words
*/
@@ -39,13 +42,14 @@ public class QueryWordsExactPhrase extends QueryWordsToken {
*/
@Override
public Query createLuceneQuery(String field, float boost) {
- PhraseQuery q = new PhraseQuery();
+ PhraseQuery.Builder qBuilder = new PhraseQuery.Builder();
+ BoostQuery boostQuery = null;
for (Iterator<String> it = getWords().iterator(); it.hasNext();) {
String word = it.next();
Term t = new Term("exact_" + field, word); //$NON-NLS-1$
- q.add(t);
- q.setBoost(boost);
+ qBuilder.add(t);
+ boostQuery = new BoostQuery(qBuilder.build(), boost);
}
- return q;
+ return boostQuery;
}
}
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java
index 04e0bebc1..e499f8bef 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsPhrase.java
@@ -7,6 +7,7 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
import java.util.ArrayList;
@@ -40,13 +41,14 @@ public class QueryWordsPhrase extends QueryWordsToken {
*/
@Override
public Query createLuceneQuery(String field, float boost) {
- PhraseQuery q = new PhraseQuery();
+ PhraseQuery.Builder qBuilder = new PhraseQuery.Builder();
+ BoostQuery boostQuery = null;
for (Iterator<String> it = getWords().iterator(); it.hasNext();) {
String word = it.next();
Term t = new Term(field, word);
- q.add(t);
- q.setBoost(boost);
+ qBuilder.add(t);
+ boostQuery = new BoostQuery(qBuilder.build(), boost);
}
- return q;
+ return boostQuery;
}
}
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java
index 6ba76f209..ce0014a44 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/QueryWordsToken.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2000, 2007 IBM Corporation and others.
+ * Copyright (c) 2000, 2016 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -7,6 +7,7 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
import org.apache.lucene.index.*;
@@ -41,16 +42,16 @@ public class QueryWordsToken {
if (questionPos == -1 && starPos == value.length() - 1) {
Term t = new Term("exact_" + field, value.substring(0, starPos)); //$NON-NLS-1$
q = new PrefixQuery(t);
- ((PrefixQuery) q).setBoost(boost);
+ q = new BoostQuery(q, boost);
} else {
Term t = new Term("exact_" + field, value); //$NON-NLS-1$
q = new WildcardQuery(t);
- ((WildcardQuery) q).setBoost(boost);
+ q = new BoostQuery(q, boost);
}
} else {
Term t = new Term(field, value);
q = new TermQuery(t);
- ((TermQuery) q).setBoost(boost);
+ q = new BoostQuery(q, boost);
}
// after updating Lucene, set boost on a Query class
return q;
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
index d47dd2e41..86c9eea18 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SearchIndex.java
@@ -9,6 +9,7 @@
* IBM Corporation - initial API and implementation
* Holger Voormann - fix for bug 426785 (http://eclip.se/426785)
* Alexander Kurtakov - Bug 460787
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
@@ -33,17 +34,24 @@ import java.util.Set;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
-import org.apache.lucene.analysis.LimitTokenCountAnalyzer;
+import org.apache.lucene.analysis.miscellaneous.LimitTokenCountAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexFormatTooOldException;
+import org.apache.lucene.index.IndexNotFoundException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.TermDocs;
-import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.IndexWriterConfig.OpenMode;
+import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
+import org.apache.lucene.index.PostingsEnum;
+import org.apache.lucene.index.SlowCompositeReaderWrapper;
+import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
@@ -170,7 +178,7 @@ public class SearchIndex implements IHelpSearchIndex {
inconsistencyFile = new File(indexDir.getParentFile(), locale + ".inconsistent"); //$NON-NLS-1$
htmlSearchParticipant = new HTMLSearchParticipant(indexDir.getAbsolutePath());
try {
- luceneDirectory = new NIOFSDirectory(indexDir);
+ luceneDirectory = new NIOFSDirectory(indexDir.toPath());
} catch (IOException e) {
}
if (!exists()) {
@@ -189,6 +197,25 @@ public class SearchIndex implements IHelpSearchIndex {
// in vm
}
}
+
+ try {
+ DirectoryReader.open(luceneDirectory);
+ } catch (IndexFormatTooOldException | IndexNotFoundException e) {
+ deleteDir(indexDir);
+ indexDir.delete();
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+
+ }
+
+ private void deleteDir(File indexDir) {
+ File[] files = indexDir.listFiles();
+ for (File file : files) {
+ if (file.isDirectory())
+ deleteDir(file);
+ file.delete();
+ }
}
/**
@@ -203,11 +230,11 @@ public class SearchIndex implements IHelpSearchIndex {
public IStatus addDocument(String name, URL url) {
try {
Document doc = new Document();
- doc.add(new Field(FIELD_NAME, name, Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new StringField(FIELD_NAME, name, Field.Store.YES));
addExtraFields(doc);
String pluginId = LocalSearchManager.getPluginId(name);
if (relativePath != null) {
- doc.add(new Field(FIELD_INDEX_ID, relativePath, Field.Store.YES, Field.Index.NOT_ANALYZED));
+ doc.add(new StringField(FIELD_INDEX_ID, relativePath, Field.Store.YES));
}
// check for the explicit search participant.
SearchParticipant participant = null;
@@ -225,9 +252,9 @@ public class SearchIndex implements IHelpSearchIndex {
String filters = doc.get("filters"); //$NON-NLS-1$
indexedDocs.put(name, filters != null ? filters : "0"); //$NON-NLS-1$
if (id != null)
- doc.add(new Field("id", id, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
+ doc.add(new StoredField("id", id)); //$NON-NLS-1$
if (pid != null)
- doc.add(new Field("participantId", pid, Field.Store.YES, Field.Index.NO)); //$NON-NLS-1$
+ doc.add(new StoredField("participantId", pid)); //$NON-NLS-1$
iw.addDocument(doc);
}
return status;
@@ -266,6 +293,7 @@ public class SearchIndex implements IHelpSearchIndex {
/**
* Starts additions. To be called before adding documents.
*/
+ @SuppressWarnings("resource")
public synchronized boolean beginAddBatch(boolean firstOperation) {
try {
if (iw != null) {
@@ -283,7 +311,7 @@ public class SearchIndex implements IHelpSearchIndex {
indexedDocs.restore();
setInconsistent(true);
LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 1000000);
- IndexWriterConfig writerConfig = new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_31, analyzer);
+ IndexWriterConfig writerConfig = new IndexWriterConfig(analyzer);
writerConfig.setOpenMode(create ? OpenMode.CREATE : OpenMode.APPEND);
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setMergeFactor(20);
@@ -307,7 +335,7 @@ public class SearchIndex implements IHelpSearchIndex {
indexedDocs = new HelpProperties(INDEXED_DOCS_FILE, indexDir);
indexedDocs.restore();
setInconsistent(true);
- ir = IndexReader.open(luceneDirectory, false);
+ ir = DirectoryReader.open(luceneDirectory);
return true;
} catch (IOException e) {
HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$
@@ -323,7 +351,7 @@ public class SearchIndex implements IHelpSearchIndex {
if (ir != null) {
ir.close();
}
- ir = IndexReader.open(luceneDirectory, false);
+ ir = DirectoryReader.open(luceneDirectory);
return true;
} catch (IOException e) {
HelpBasePlugin.logError("Exception occurred in search indexing at beginDeleteBatch.", e); //$NON-NLS-1$
@@ -341,7 +369,7 @@ public class SearchIndex implements IHelpSearchIndex {
public IStatus removeDocument(String name) {
Term term = new Term(FIELD_NAME, name);
try {
- ir.deleteDocuments(term);
+ iw.deleteDocuments(term);
indexedDocs.remove(name);
} catch (IOException e) {
return new Status(IStatus.ERROR, HelpBasePlugin.PLUGIN_ID, IStatus.ERROR,
@@ -379,7 +407,7 @@ public class SearchIndex implements IHelpSearchIndex {
* know about this change. Close it so that it gets reloaded next search.
*/
if (searcher != null) {
- searcher.close();
+ searcher.getIndexReader().close();
searcher = null;
}
return true;
@@ -411,7 +439,7 @@ public class SearchIndex implements IHelpSearchIndex {
* know about this change. Close it so that it gets reloaded next search.
*/
if (searcher != null) {
- searcher.close();
+ searcher.getIndexReader().close();
searcher = null;
}
return true;
@@ -468,8 +496,8 @@ public class SearchIndex implements IHelpSearchIndex {
for (int i = 0; i < indexPaths.size(); i++) {
String indexId = indexIds.get(i);
String indexPath = indexPaths.get(i);
- try {
- dirList.add(new NIOFSDirectory(new File(indexPath)));
+ try (NIOFSDirectory dir = new NIOFSDirectory(new File(indexPath).toPath())) {
+ dirList.add(dir);
} catch (IOException ioe) {
HelpBasePlugin
.logError(
@@ -525,19 +553,15 @@ public class SearchIndex implements IHelpSearchIndex {
}
public IStatus removeDuplicates(String name, String[] index_paths) {
- TermDocs hrefDocs = null;
- TermDocs indexDocs = null;
- Term hrefTerm = new Term(FIELD_NAME, name);
- try {
+
+ try (LeafReader ar = SlowCompositeReaderWrapper.wrap(ir)) {
+ PostingsEnum hrefDocs = null;
+ PostingsEnum indexDocs = null;
+ Term hrefTerm = new Term(FIELD_NAME, name);
for (int i = 0; i < index_paths.length; i++) {
Term indexTerm = new Term(FIELD_INDEX_ID, index_paths[i]);
- if (i == 0) {
- hrefDocs = ir.termDocs(hrefTerm);
- indexDocs = ir.termDocs(indexTerm);
- } else {
- hrefDocs.seek(hrefTerm);
- indexDocs.seek(indexTerm);
- }
+ hrefDocs = ar.postings(hrefTerm);
+ indexDocs = ar.postings(indexTerm);
removeDocuments(hrefDocs, indexDocs);
}
} catch (IOException ioe) {
@@ -545,19 +569,6 @@ public class SearchIndex implements IHelpSearchIndex {
"IO exception occurred while removing duplicates of document " + name //$NON-NLS-1$
+ " from index " + indexDir.getAbsolutePath() + ".", //$NON-NLS-1$ //$NON-NLS-2$
ioe);
- } finally {
- if (hrefDocs != null) {
- try {
- hrefDocs.close();
- } catch (IOException e) {
- }
- }
- if (indexDocs != null) {
- try {
- indexDocs.close();
- } catch (IOException e) {
- }
- }
}
return Status.OK_STATUS;
}
@@ -569,33 +580,33 @@ public class SearchIndex implements IHelpSearchIndex {
* @param docs2
* @throws IOException
*/
- private void removeDocuments(TermDocs doc1, TermDocs docs2) throws IOException {
- if (!doc1.next()) {
+ private void removeDocuments(PostingsEnum doc1, PostingsEnum docs2) throws IOException {
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
return;
}
- if (!docs2.next()) {
+ if (docs2.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
return;
}
while (true) {
- if (doc1.doc() < docs2.doc()) {
- if (!doc1.skipTo(docs2.doc())) {
- if (!doc1.next()) {
+ if (doc1.docID() < docs2.docID()) {
+ if (doc1.advance(docs2.docID()) == PostingsEnum.NO_MORE_DOCS) {
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
return;
}
}
- } else if (doc1.doc() > docs2.doc()) {
- if (!docs2.skipTo(doc1.doc())) {
- if (!doc1.next()) {
+ } else if (doc1.docID() > docs2.docID()) {
+ if (docs2.advance(doc1.docID()) == PostingsEnum.NO_MORE_DOCS) {
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
return;
}
}
}
- if (doc1.doc() == docs2.doc()) {
- ir.deleteDocument(doc1.doc());
- if (!doc1.next()) {
+ if (doc1.docID() == docs2.docID()) {
+ iw.tryDeleteDocument(ir, doc1.docID());
+ if (doc1.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
return;
}
- if (!docs2.next()) {
+ if (docs2.nextDoc() == PostingsEnum.NO_MORE_DOCS) {
return;
}
}
@@ -634,7 +645,7 @@ public class SearchIndex implements IHelpSearchIndex {
if (searcher == null) {
openSearcher();
}
- TopDocs topDocs = searcher.search(luceneQuery, null, 1000);
+ TopDocs topDocs = searcher.search(luceneQuery, 1000);
collector.addHits(LocalSearchManager.asList(topDocs, searcher), highlightTerms);
}
} catch (BooleanQuery.TooManyClauses tmc) {
@@ -731,9 +742,9 @@ public class SearchIndex implements IHelpSearchIndex {
}
Version luceneVersion = new Version(luceneVersionString);
Version indexVersion = new Version(indexVersionString);
- Version v191 = new Version(1, 9, 1);
- if (indexVersion.compareTo(v191) < 0) {
- // index is older than Lucene 1.9.1
+ Version v610 = new Version(6, 1, 0);
+ if (indexVersion.compareTo(v610) < 0) {
+ // index is older than Lucene 6.1.0
return false;
}
if ( luceneVersion.compareTo(indexVersion) >= 0 ) {
@@ -801,7 +812,7 @@ public class SearchIndex implements IHelpSearchIndex {
public void openSearcher() throws IOException {
synchronized (searcherCreateLock) {
if (searcher == null) {
- searcher = new IndexSearcher(IndexReader.open(luceneDirectory, false));
+ searcher = new IndexSearcher(DirectoryReader.open(luceneDirectory));
}
}
}
@@ -819,7 +830,7 @@ public class SearchIndex implements IHelpSearchIndex {
if (searches.isEmpty()) {
if (searcher != null) {
try {
- searcher.close();
+ searcher.getIndexReader().close();
} catch (IOException ioe) {
}
}
@@ -899,7 +910,7 @@ public class SearchIndex implements IHelpSearchIndex {
private void cleanOldIndex() {
try (LimitTokenCountAnalyzer analyzer = new LimitTokenCountAnalyzer(analyzerDescriptor.getAnalyzer(), 10000);
IndexWriter cleaner = new IndexWriter(luceneDirectory,
- new IndexWriterConfig(org.apache.lucene.util.Version.LUCENE_31, analyzer)
+ new IndexWriterConfig(analyzer)
.setOpenMode(OpenMode.CREATE))) {
} catch (IOException ioe) {
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
index 169937759..bfb5a46b9 100644
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
+++ b/org.eclipse.help.base/src/org/eclipse/help/internal/search/SmartAnalyzer.java
@@ -1,5 +1,5 @@
/*******************************************************************************
- * Copyright (c) 2000, 2015 IBM Corporation and others.
+ * Copyright (c) 2000, 2016 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
@@ -7,18 +7,18 @@
*
* Contributors:
* IBM Corporation - initial API and implementation
+ * Sopot Cela - Bug 466829
*******************************************************************************/
package org.eclipse.help.internal.search;
-import java.io.*;
-
-import org.apache.lucene.analysis.*;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.AnalyzerWrapper;
/**
* Smart Analyzer. Chooses underlying implementation based on the field which
* text is analyzed.
*/
-public final class SmartAnalyzer extends Analyzer {
+public final class SmartAnalyzer extends AnalyzerWrapper {
Analyzer pluggedInAnalyzer;
Analyzer exactAnalyzer;
@@ -26,20 +26,16 @@ public final class SmartAnalyzer extends Analyzer {
* Constructor for SmartAnalyzer.
*/
public SmartAnalyzer(String locale, Analyzer pluggedInAnalyzer) {
- super();
+ super(PER_FIELD_REUSE_STRATEGY);
this.pluggedInAnalyzer = pluggedInAnalyzer;
this.exactAnalyzer = new DefaultAnalyzer(locale);
}
- /**
- * Creates a TokenStream which tokenizes all the text in the provided
- * Reader. Delegates to DefaultAnalyzer when field used to search for exact
- * match, and to plugged-in analyzer for other fields.
- */
+
@Override
- public final TokenStream tokenStream(String fieldName, Reader reader) {
+ public final Analyzer getWrappedAnalyzer(String fieldName) {
if (fieldName != null && fieldName.startsWith("exact_")) { //$NON-NLS-1$
- return exactAnalyzer.tokenStream(fieldName, reader);
+ return exactAnalyzer;
}
- return pluggedInAnalyzer.tokenStream(fieldName, reader);
+ return pluggedInAnalyzer;
}
}
diff --git a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java b/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
deleted file mode 100644
index 72d320312..000000000
--- a/org.eclipse.help.base/src/org/eclipse/help/internal/search/WordTokenStream.java
+++ /dev/null
@@ -1,115 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2000, 2015 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.help.internal.search;
-
-import com.ibm.icu.text.BreakIterator;
-import java.io.IOException;
-import java.io.Reader;
-import java.util.Locale;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Tokenizer;
-import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
-
-/**
- * WordTokenStream obtains tokens containing words appropriate for use with
- * Lucene search engine.
- */
-public final class WordTokenStream extends Tokenizer {
- private static final int BUF_LEN = 4096;
- private final Reader reader;
- private final BreakIterator boundary;
- private StringBuffer strbuf;
-
- private int start = 0;
- private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
-
- /**
- * Constructor
- */
- public WordTokenStream(String fieldName, Reader reader, Locale locale) {
- this.reader = reader;
- boundary = BreakIterator.getWordInstance(locale);
-
- }
- /**
- * @see TokenStream#incrementToken()
- */
- @Override
- public boolean incrementToken() throws IOException {
- clearAttributes();
- int length = 0;
- char[] buffer = termAtt.buffer();
-
- int end;
- if(strbuf == null) {
- int available;
- char[] cbuf = new char[BUF_LEN];
- while ((available = reader.read(cbuf)) <= 0) {
- if (available < 0) {
- reader.close();
- return false;
- }
- }
- strbuf = new StringBuffer(available + 80);
- strbuf.append(cbuf, 0, available);
- // read more until white space (or EOF)
- int c;
- while (0 <= (c = reader.read())) {
- strbuf.append((char) c);
- if (c == ' ' || c == '\r' || c == '\n' || c == '\t') {
- break;
- }
- }
-
- if (c < 0) {
- reader.close();
- }
-
- boundary.setText(strbuf.toString());
- start = boundary.first();
- }
- else {
- start = boundary.next();
- }
-
- for (end = boundary.next(); end != BreakIterator.DONE; start = end, end = boundary.next()) {
- // determine if it is a word
- // any letter or digit between boundaries means it is a word
- for (int i = start; i < end; i++) {
- if (Character.isLetterOrDigit(strbuf.charAt(i))) {
- // it is a word
- length = end - start;
- if (length >= buffer.length-1)
- buffer = termAtt.resizeBuffer(2+length);
- termAtt.setLength(length);
- strbuf.getChars(start, end, buffer, 0);
- return true;
- }
- }
- }
-
- return false;
- }
-
- @Override
- public void reset() throws IOException {
- super.reset();
- clearAttributes();
- }
-
- @Override
- public void close() throws IOException {
- /// Unlikely to be called as this is a reused
- if (this.reader != null) {
- this.reader.close();
- }
- }
-}

Back to the top