Skip to main content
aboutsummaryrefslogtreecommitdiffstats
blob: 1bb3a82a61e2dbde531bd334fd4e30894da25b88 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
/*******************************************************************************
 * Copyright (c) 2010, 2011 IBM Corporation and others.
 *
 * This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License 2.0
 * which accompanies this distribution, and is available at
 * https://www.eclipse.org/legal/epl-2.0/
 *
 * SPDX-License-Identifier: EPL-2.0
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 ******************************************************************************/
package org.eclipse.equinox.bidi;

import org.eclipse.equinox.bidi.advanced.*;
import org.eclipse.equinox.bidi.custom.StructuredTextTypeHandler;

/**
 * Provides methods to process bidirectional text with a specific 
 * structure. The methods in this class are the most straightforward 
 * way to add directional formatting characters to the source text to 
 * ensure correct presentation, or to remove those characters to 
 * restore the original text
 * (for more explanations, please see the 
 * {@link org.eclipse.equinox.bidi package documentation</a>}.
 * <p>
 * This class can be used without OSGi running (but only the structured text types declared
 * in {@link StructuredTextTypeHandlerFactory} are available in that mode).
 * </p>
 *
 *  @noinstantiate This class is not intended to be instantiated by clients.
 */
public final class StructuredTextProcessor {

	/**
	 * The default set of separators used to segment a string: dot, 
	 * colon, slash, backslash.
	 */
	private static final String defaultSeparators = ".:/\\"; //$NON-NLS-1$

	// left to right mark
	private static final char LRM = '\u200e';

	// left to right embedding
	private static final char LRE = '\u202a';

	// right to left embedding
	private static final char RLE = '\u202b';

	// pop directional format
	private static final char PDF = '\u202c';

	/**
	 * Prevents instantiation.
	 */
	private StructuredTextProcessor() {
		// empty
	}

	/**
	 * Processes the given (<i>lean</i>) text and returns a string with appropriate
	 * directional formatting characters (<i>full</i> text). This is equivalent to 
	 * calling {@link #process(String str, String separators)} with the default
	 * set of separators.
	 * <p>
	 * The processing adds directional formatting characters so that presentation 
	 * using the Unicode Bidirectional Algorithm will provide the expected result.
	 * The text is segmented according to the provided separators.
	 * Each segment has the Unicode Bidi Algorithm applied to it,
	 * but as a whole, the string is oriented left to right.
	 * </p><p>
	 * For example, a file path such as <tt>d:\myfolder\FOLDER\MYFILE.java</tt>
	 * (where capital letters indicate RTL text) should render as
	 * <tt>d:\myfolder\REDLOF\ELIFYM.java</tt>.
	 * </p>
	 * 
	 * @param str the <i>lean</i> text to process
	 *  
	 * @return the processed string (<i>full</i> text)
	 * 
	 * @see #deprocess(String)
	 */
	public static String process(String str) {
		return process(str, defaultSeparators);
	}

	/**
	 * Processes a string that has a particular semantic meaning to render
	 * it correctly in bidi environments.
	 * For more details, see {@link #process(String)}.
	 * 
	 * @param str the <i>lean</i> text to process
	 * @param separators characters by which the string will be segmented
	 * 
	 * @return the processed string (<i>full</i> text)
	 * 
	 * @see #deprocess(String)
	 */
	public static String process(String str, String separators) {
		if ((str == null) || (str.length() <= 1))
			return str;

		// do not process a string that has already been processed.
		if (str.charAt(0) == LRE && str.charAt(str.length() - 1) == PDF)
			return str;

		StructuredTextEnvironment env = new StructuredTextEnvironment(null, false, StructuredTextEnvironment.ORIENT_UNKNOWN);
		// do not process a string if all the following conditions are true:
		//  a) it has no RTL characters
		//  b) it starts with a LTR character
		//  c) it ends with a LTR character or a digit
		boolean isStringBidi = false;
		int strLength = str.length();
		char c;
		for (int i = 0; i < strLength; i++) {
			c = str.charAt(i);
			if (((c >= 0x05d0) && (c <= 0x07b1)) || ((c >= 0xfb1d) && (c <= 0xfefc))) {
				isStringBidi = true;
				break;
			}
		}
		while (!isStringBidi) {
			if (!Character.isLetter(str.charAt(0)))
				break;
			c = str.charAt(strLength - 1);
			if (!Character.isDigit(c) && !Character.isLetter(c))
				break;
			return str;
		}

		if (separators == null)
			separators = defaultSeparators;

		// make sure that LRE/PDF are added around the string
		StructuredTextTypeHandler handler = new StructuredTextTypeHandler(separators);
		IStructuredTextExpert expert = StructuredTextExpertFactory.getStatefulExpert(handler, env);
		return expert.leanToFullText(str);
	}

	/**
	 * Processes a string that has a particular semantic meaning to render
	 * it correctly in bidi environments.
	 * For more details, see {@link #process(String)}.
	 * 
	 * @param  str the <i>lean</i> text to process.
	 * @param  textType an identifier for the structured text handler  
	 *         appropriate for the type of the text submitted.
	 *         It may be one of the identifiers defined in 
	 *         {@link StructuredTextTypeHandlerFactory} or a type handler identifier 
	 *         registered by a plug-in.
	 *         
	 * @return the processed string (<i>full</i> text).
	 * 
	 * @see #deprocessTyped
	 */
	public static String processTyped(String str, String textType) {
		if ((str == null) || (str.length() <= 1))
			return str;

		// do not process a string that has already been processed.
		char c = str.charAt(0);
		if (((c == LRE) || (c == RLE)) && str.charAt(str.length() - 1) == PDF)
			return str;

		// make sure that LRE/PDF are added around the string
		StructuredTextEnvironment env = new StructuredTextEnvironment(null, false, StructuredTextEnvironment.ORIENT_UNKNOWN);
		IStructuredTextExpert expert = StructuredTextExpertFactory.getExpert(textType, env);
		return expert.leanToFullText(str);
	}

	/**
	 * Removes directional formatting characters in the given string.
	 * 
	 * @param  str string with directional characters to remove (<i>full</i> text).
	 * 
	 * @return string without directional formatting characters (<i>lean</i> text).
	 */
	public static String deprocess(String str) {
		if ((str == null) || (str.length() <= 1))
			return str;

		StringBuffer buf = new StringBuffer();
		int strLen = str.length();
		for (int i = 0; i < strLen; i++) {
			char c = str.charAt(i);
			switch (c) {
				case LRM :
					continue;
				case LRE :
					continue;
				case PDF :
					continue;
				default :
					buf.append(c);
			}
		}
		return buf.toString();
	}

	/**
	 * Removes directional formatting characters in the given string.
	 * 
	 * @param  str string with directional characters to remove (<i>full</i> text).
	 * @param  textType an identifier for the structured text handler  
	 *         appropriate for the type of the text submitted.
	 *         It may be one of the identifiers defined in 
	 *         {@link StructuredTextTypeHandlerFactory} or a type handler identifier 
	 *         registered by a plug-in.
	 *         
	 * @return string without directional formatting characters (<i>lean</i> text).
	 * 
	 * @see #processTyped(String, String)
	 */
	public static String deprocessTyped(String str, String textType) {
		if ((str == null) || (str.length() <= 1))
			return str;

		// make sure that LRE/PDF are removed from the string
		StructuredTextEnvironment env = new StructuredTextEnvironment(null, false, StructuredTextEnvironment.ORIENT_UNKNOWN);
		IStructuredTextExpert expert = StructuredTextExpertFactory.getExpert(textType, env);
		return expert.fullToLeanText(str);
	}

	/**
	 * Returns a string containing all the default separator characters to be
	 * used to segment a given string.
	 * 
	 * @return string containing all separators.
	 */
	public static String getDefaultSeparators() {
		return defaultSeparators;
	}

}

Back to the top