Skip to main content
summaryrefslogtreecommitdiffstats
blob: dae2a355777ca3a81d84e9f8e101b054ca069f36 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/*******************************************************************************
 * Copyright (c) 2000, 2016 IBM Corporation and others. All rights reserved. This program and the
 * accompanying materials are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Alexander Kurtakov - Bug 460787
 *     Sopot Cela - Bug 466829
 *******************************************************************************/
package org.eclipse.help.internal.search;

import java.util.Locale;
import java.util.StringTokenizer;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.eclipse.core.runtime.Platform;
import org.eclipse.help.internal.base.HelpBasePlugin;

import com.ibm.icu.text.BreakIterator;


/**
 * Lucene Analyzer. LowerCaseFilter->StandardTokenizer
 */
public final class DefaultAnalyzer extends Analyzer {

	private Locale locale;

	/**
	 * Creates a new analyzer using the given locale.
	 */
	public DefaultAnalyzer(String localeString) {
		super();
		// Create a locale object for a given locale string
		Locale userLocale = getLocale(localeString);

		// Check if the locale is supported by BreakIterator
		// check here to do it only once.
		Locale[] availableLocales = BreakIterator.getAvailableLocales();
		for (int i = 0; i < availableLocales.length; i++) {
			if (userLocale.equals(availableLocales[i])) {
				locale = userLocale;
				break;
			}
		}
		if (locale == null && userLocale.getDisplayVariant().length() > 0) {
			// Check if the locale without variant is supported by BreakIterator
			Locale countryLocale = new Locale(userLocale.getLanguage(), userLocale.getCountry());
			for (int i = 0; i < availableLocales.length; i++) {
				if (countryLocale.equals(availableLocales[i])) {
					locale = countryLocale;
					break;
				}
			}
		}
		if (locale == null && userLocale.getCountry().length() > 0) {
			// Check if at least the language is supported by BreakIterator
			Locale language = new Locale(userLocale.getLanguage(), ""); //$NON-NLS-1$
			for (int i = 0; i < availableLocales.length; i++) {
				if (language.equals(availableLocales[i])) {
					locale = language;
					break;
				}
			}
		}

		if (locale == null) {
			// Locale is not supported, will use en_US
			HelpBasePlugin
					.logError(
							"Text Analyzer could not be created for locale {0}.  An analyzer that extends org.eclipse.help.luceneAnalyzer extension point needs to be plugged in for locale " //$NON-NLS-1$
									+ localeString
									+ ", or Java Virtual Machine needs to be upgraded to version with proper support for locale {0}.", //$NON-NLS-1$
							null);
			locale = new Locale("en", "US"); //$NON-NLS-1$ //$NON-NLS-2$
		}
	}

	/**
	 * Creates a Locale object out of a string representation
	 */
	private Locale getLocale(String clientLocale) {
		if (clientLocale == null)
			clientLocale = Platform.getNL();
		if (clientLocale == null)
			clientLocale = Locale.getDefault().toString();

		// break the string into tokens to get the Locale object
		StringTokenizer locales = new StringTokenizer(clientLocale, "_"); //$NON-NLS-1$
		if (locales.countTokens() == 1)
			return new Locale(locales.nextToken(), ""); //$NON-NLS-1$
		else if (locales.countTokens() == 2)
			return new Locale(locales.nextToken(), locales.nextToken());
		else if (locales.countTokens() == 3)
			return new Locale(locales.nextToken(), locales.nextToken(), locales.nextToken());
		else
			return Locale.getDefault();
	}

	/*
	 * Can't use try-with-resources because the Lucene internally reuses
	 * components. See {@link org.apache.lucene.analysis.Analyzer.ReuseStrategy}
	 */
	@SuppressWarnings("resource")
	@Override
	protected TokenStreamComponents createComponents(String fieldName) {
		Tokenizer source = new StandardTokenizer();
		LowerCaseFilter filter = new LowerCaseFilter(source);
		TokenStreamComponents components = new TokenStreamComponents(source, filter);
		return components;
	}
}

Back to the top