1 files changed, 545 insertions, 0 deletions
diff --git a/bundles/org.eclipse.equinox.bidi/src/org/eclipse/equinox/bidi/internal/StructuredTextImpl.java b/bundles/org.eclipse.equinox.bidi/src/org/eclipse/equinox/bidi/internal/StructuredTextImpl.java
new file mode 100644
index 000000000..790395a76
--- /dev/null
+++ b/bundles/org.eclipse.equinox.bidi/src/org/eclipse/equinox/bidi/internal/StructuredTextImpl.java
@@ -0,0 +1,545 @@
+/*******************************************************************************
+ * Copyright (c) 2010, 2011 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ *     IBM Corporation - initial API and implementation
+ ******************************************************************************/
+package org.eclipse.equinox.bidi.internal;
+
+import org.eclipse.equinox.bidi.advanced.*;
+import org.eclipse.equinox.bidi.custom.*;
+
+/**
+ * Implementation for IStructuredTextExpert.
+ */
+public class StructuredTextImpl implements IStructuredTextExpert {
+
+	static final String EMPTY_STRING = ""; //$NON-NLS-1$
+
+	// In the following lines, B, L, R and AL represent bidi categories
+	// as defined in the Unicode Bidirectional Algorithm
+	// ( http://www.unicode.org/reports/tr9/ ).
+	// B  represents the category Block Separator.
+	// L  represents the category Left to Right character.
+	// R  represents the category Right to Left character.
+	// AL represents the category Arabic Letter.
+	// AN represents the category Arabic Number.
+	// EN  represents the category European Number.
+	static final byte B = Character.DIRECTIONALITY_PARAGRAPH_SEPARATOR;
+	static final byte L = Character.DIRECTIONALITY_LEFT_TO_RIGHT;
+	static final byte R = Character.DIRECTIONALITY_RIGHT_TO_LEFT;
+	static final byte AL = Character.DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC;
+	static final byte AN = Character.DIRECTIONALITY_ARABIC_NUMBER;
+	static final byte EN = Character.DIRECTIONALITY_EUROPEAN_NUMBER;
+
+	static final char LRM = 0x200E;
+	static final char RLM = 0x200F;
+	static final char LRE = 0x202A;
+	static final char RLE = 0x202B;
+	static final char PDF = 0x202C;
+	static final char[] MARKS = {LRM, RLM};
+	static final char[] EMBEDS = {LRE, RLE};
+	static final int PREFIX_LENGTH = 2;
+	static final int SUFFIX_LENGTH = 2;
+	static final int FIXES_LENGTH = PREFIX_LENGTH + SUFFIX_LENGTH;
+	static final int[] EMPTY_INT_ARRAY = new int[0];
+
+	/**
+	 * The structured text handler utilized by this expert.
+	 */
+	protected final StructuredTextTypeHandler handler;
+	/**
+	 * The environment associated with the expert.
+	 */
+	protected final StructuredTextEnvironment environment;
+	/**
+	 * Flag which is true if the expert is stateful.
+	 */
+	protected final boolean sharedExpert;
+	/**
+	 * Last state value set by {@link #setState} or {@link #clearState}.
+	 */
+	protected Object state;
+
+	/**
+	 * Constructor used in {@link StructuredTextExpertFactory}.
+	 * 
+	 * @param structuredTextHandler the structured text handler used by this expert.
+	 * @param environment the environment associated with this expert.
+	 * @param shared flag which is true if the expert is stateful.
+	 */
+	public StructuredTextImpl(StructuredTextTypeHandler structuredTextHandler, StructuredTextEnvironment environment, boolean shared) {
+		this.handler = structuredTextHandler;
+		this.environment = environment;
+		sharedExpert = shared;
+	}
+
+	public StructuredTextTypeHandler getTypeHandler() {
+		return handler;
+	}
+
+	public StructuredTextEnvironment getEnvironment() {
+		return environment;
+	}
+
+	public int getTextDirection(String text) {
+		return handler.getDirection(this, text);
+	}
+
+	public void clearState() {
+		if (sharedExpert)
+			state = null;
+	}
+
+	public void setState(Object newState) {
+		if (sharedExpert)
+			state = newState;
+	}
+
+	public Object getState() {
+		return state;
+	}
+
+	long computeNextLocation(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int[] locations, int curPos) {
+		String separators = handler.getSeparators(this);
+		int separCount = separators.length();
+		int specialsCount = handler.getSpecialsCount(this);
+		int len = text.length();
+		int nextLocation = len;
+		int idxLocation = 0;
+		// Start with special sequences to give them precedence over simple
+		// separators. This may apply to cases like slash+asterisk versus slash.
+		for (int i = 0; i < specialsCount; i++) {
+			int location = locations[separCount + i];
+			if (location < curPos) {
+				location = handler.indexOfSpecial(this, text, charTypes, offsets, i + 1, curPos);
+				if (location < 0)
+					location = len;
+				locations[separCount + i] = location;
+			}
+			if (location < nextLocation) {
+				nextLocation = location;
+				idxLocation = separCount + i;
+			}
+		}
+		for (int i = 0; i < separCount; i++) {
+			int location = locations[i];
+			if (location < curPos) {
+				location = text.indexOf(separators.charAt(i), curPos);
+				if (location < 0)
+					location = len;
+				locations[i] = location;
+			}
+			if (location < nextLocation) {
+				nextLocation = location;
+				idxLocation = i;
+			}
+		}
+		return nextLocation + (((long) idxLocation) << 32);
+	}
+
+	/**
+	 * @see StructuredTextTypeHandler#processSeparator StructuredTextTypeHandler.processSeparator
+	 */
+	static public void processSeparator(String text, StructuredTextCharTypes charTypes, StructuredTextOffsets offsets, int separLocation) {
+		int len = text.length();
+		int direction = charTypes.getDirection();
+		if (direction == DIR_RTL) {
+			// the structured text base direction is RTL
+			for (int i = separLocation - 1; i >= 0; i--) {
+				byte charType = charTypes.getBidiTypeAt(i);
+				if (charType == R || charType == AL)
+					return;
+				if (charType == L) {
+					for (int j = separLocation; j < len; j++) {
+						charType = charTypes.getBidiTypeAt(j);
+						if (charType == R || charType == AL)
+							return;
+						if (charType == L || charType == EN) {
+							offsets.insertOffset(charTypes, separLocation);
+							return;
+						}
+					}
+					return;
+				}
+			}
+			return;
+		}
+
+		// the structured text base direction is LTR
+		boolean doneAN = false;
+		for (int i = separLocation - 1; i >= 0; i--) {
+			byte charType = charTypes.getBidiTypeAt(i);
+			if (charType == L)
+				return;
+			if (charType == R || charType == AL) {
+				for (int j = separLocation; j < len; j++) {
+					charType = charTypes.getBidiTypeAt(j);
+					if (charType == L)
+						return;
+					if (charType == R || charType == EN || charType == AL || charType == AN) {
+						offsets.insertOffset(charTypes, separLocation);
+						return;
+					}
+				}
+				return;
+			}
+			if (charType == AN && !doneAN) {
+				for (int j = separLocation; j < len; j++) {
+					charType = charTypes.getBidiTypeAt(j);
+					if (charType == L)
+						return;
+					if (charType == AL || charType == AN || charType == R) {
+						offsets.insertOffset(charTypes, separLocation);
+						return;
+					}
+				}
+				doneAN = true;
+			}
+		}
+	}
+
+	/**
+	 *  When the orientation is <code>ORIENT_LTR</code> and the
+	 *  structured text has a RTL base direction,
+	 *  {@link IStructuredTextExpert#leanToFullText leanToFullText}
+	 *  adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its
+	 *  end.
+	 *  <p>
+	 *  When the orientation is <code>ORIENT_RTL</code> and the
+	 *  structured text has a LTR base direction,
+	 *  {@link IStructuredTextExpert#leanToFullText leanToFullText}
+	 *  adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its
+	 *  end.
+	 *  <p>
+	 *  When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or
+	 *  <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve
+	 *  to a RTL orientation while the structured text has a LTR base
+	 *  direction, {@link IStructuredTextExpert#leanToFullText leanToFullText}
+	 *  adds LRM at the head of the <i>full</i> text.
+	 *  <p>
+	 *  When the orientation is <code>ORIENT_CONTEXTUAL_LTR</code> or
+	 *  <code>ORIENT_CONTEXTUAL_RTL</code> and the data content would resolve
+	 *  to a LTR orientation while the structured text has a RTL base
+	 *  direction, {@link IStructuredTextExpert#leanToFullText leanToFullText}
+	 *  adds RLM at the head of the <i>full</i> text.
+	 *  <p>
+	 *  When the orientation is <code>ORIENT_UNKNOWN</code> and the
+	 *  structured text has a LTR base direction,
+	 *  {@link IStructuredTextExpert#leanToFullText leanToFullText}
+	 *  adds LRE+LRM at the head of the <i>full</i> text and LRM+PDF at its
+	 *  end.
+	 *  <p>
+	 *  When the orientation is <code>ORIENT_UNKNOWN</code> and the
+	 *  structured text has a RTL base direction,
+	 *  {@link IStructuredTextExpert#leanToFullText leanToFullText}
+	 *  adds RLE+RLM at the head of the <i>full</i> text and RLM+PDF at its
+	 *  end.
+	 *  <p>
+	 *  When the orientation is <code>ORIENT_IGNORE</code>,
+	 *  {@link IStructuredTextExpert#leanToFullText leanToFullText} does not add any directional
+	 *  formatting characters as either prefix or suffix of the <i>full</i> text.
+	 *  <p>
+	 */
+	public String leanToFullText(String text) {
+		int len = text.length();
+		if (len == 0)
+			return text;
+		StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text);
+		StructuredTextOffsets offsets = leanToFullCommon(text, charTypes);
+		int prefixLength = offsets.getPrefixLength();
+		int direction = charTypes.getDirection();
+		return insertMarks(text, offsets.getOffsets(), direction, prefixLength);
+	}
+
+	public int[] leanToFullMap(String text) {
+		int len = text.length();
+		if (len == 0)
+			return EMPTY_INT_ARRAY;
+		StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text);
+		StructuredTextOffsets offsets = leanToFullCommon(text, charTypes);
+		int prefixLength = offsets.getPrefixLength();
+		int[] map = new int[len];
+		int count = offsets.getCount(); // number of used entries
+		int added = prefixLength;
+		for (int pos = 0, i = 0; pos < len; pos++) {
+			if (i < count && pos == offsets.getOffset(i)) {
+				added++;
+				i++;
+			}
+			map[pos] = pos + added;
+		}
+		return map;
+	}
+
+	public int[] leanBidiCharOffsets(String text) {
+		int len = text.length();
+		if (len == 0)
+			return EMPTY_INT_ARRAY;
+		StructuredTextCharTypes charTypes = new StructuredTextCharTypes(this, text);
+		StructuredTextOffsets offsets = leanToFullCommon(text, charTypes);
+		return offsets.getOffsets();
+	}
+
+	private StructuredTextOffsets leanToFullCommon(String text, StructuredTextCharTypes charTypes) {
+		int len = text.length();
+		int direction = handler.getDirection(this, text, charTypes);
+		StructuredTextOffsets offsets = new StructuredTextOffsets();
+		if (!handler.skipProcessing(this, text, charTypes)) {
+			// initialize locations
+			int separCount = handler.getSeparators(this).length();
+			int[] locations = new int[separCount + handler.getSpecialsCount(this)];
+			for (int i = 0, k = locations.length; i < k; i++) {
+				locations[i] = -1;
+			}
+			// current position
+			int curPos = 0;
+			if (state != null) {
+				curPos = handler.processSpecial(this, text, charTypes, offsets, 0, -1);
+			}
+			while (true) {
+				// location of next token to handle
+				int nextLocation;
+				// index of next token to handle (if < separCount, this is a separator; otherwise a special case
+				int idxLocation;
+				long res = computeNextLocation(text, charTypes, offsets, locations, curPos);
+				nextLocation = (int) (res & 0x00000000FFFFFFFF); /* low word */
+				if (nextLocation >= len)
+					break;
+				idxLocation = (int) (res >> 32); /* high word */
+				if (idxLocation < separCount) {
+					processSeparator(text, charTypes, offsets, nextLocation);
+					curPos = nextLocation + 1;
+				} else {
+					idxLocation -= (separCount - 1); // because caseNumber starts from 1
+					curPos = handler.processSpecial(this, text, charTypes, offsets, idxLocation, nextLocation);
+				}
+				if (curPos >= len)
+					break;
+			} // end while
+		} // end if (!handler.skipProcessing())
+		int prefixLength;
+		int orientation = environment.getOrientation();
+		if (orientation == StructuredTextEnvironment.ORIENT_IGNORE)
+			prefixLength = 0;
+		else {
+			int resolvedOrientation = charTypes.resolveOrientation();
+			if (orientation != StructuredTextEnvironment.ORIENT_UNKNOWN && resolvedOrientation == direction)
+				prefixLength = 0;
+			else if ((orientation & StructuredTextEnvironment.ORIENT_CONTEXTUAL) != 0)
+				prefixLength = 1;
+			else
+				prefixLength = 2;
+		}
+		offsets.setPrefixLength(prefixLength);
+		return offsets;
+	}
+
+	public String fullToLeanText(String full) {
+		if (full.length() == 0)
+			return full;
+		int dir = handler.getDirection(this, full);
+		char curMark = MARKS[dir];
+		char curEmbed = EMBEDS[dir];
+		int i; // used as loop index
+		// remove any prefix and leading mark
+		int lenFull = full.length();
+		for (i = 0; i < lenFull; i++) {
+			char c = full.charAt(i);
+			if (c != curEmbed && c != curMark)
+				break;
+		}
+		if (i > 0) { // found at least one prefix or leading mark
+			full = full.substring(i);
+			lenFull = full.length();
+		}
+		// remove any suffix and trailing mark
+		for (i = lenFull - 1; i >= 0; i--) {
+			char c = full.charAt(i);
+			if (c != PDF && c != curMark)
+				break;
+		}
+		if (i < 0) // only suffix and trailing marks, no real data
+			return EMPTY_STRING;
+		if (i < (lenFull - 1)) { // found at least one suffix or trailing mark
+			full = full.substring(0, i + 1);
+			lenFull = full.length();
+		}
+		char[] chars = full.toCharArray();
+		// remove marks from chars
+		int cnt = 0;
+		for (i = 0; i < lenFull; i++) {
+			char c = chars[i];
+			if (c == curMark)
+				cnt++;
+			else if (cnt > 0)
+				chars[i - cnt] = c;
+		}
+		String lean = new String(chars, 0, lenFull - cnt);
+		String full2 = leanToFullText(lean);
+		// strip prefix and suffix
+		int beginIndex = 0, endIndex = full2.length();
+		if (full2.charAt(0) == curMark)
+			beginIndex = 1;
+		else {
+			if (full2.charAt(0) == curEmbed) {
+				beginIndex = 1;
+				if (full2.charAt(0) == curMark)
+					beginIndex = 2;
+			}
+			if (full2.charAt(endIndex - 1) == PDF) {
+				endIndex--;
+				if (full2.charAt(endIndex - 1) == curMark)
+					endIndex--;
+			}
+		}
+		if (beginIndex > 0 || endIndex < full2.length())
+			full2 = full2.substring(beginIndex, endIndex);
+		if (full2.equals(full))
+			return lean;
+
+		// There are some marks in full which are not in full2 and/or vice versa.
+		// We need to add to lean any mark appearing in full and not in full2.
+		// The completed lean can never be longer than full itself.
+		char[] newChars = new char[lenFull];
+		char cFull, cFull2;
+		int idxFull, idxFull2, idxLean, newCharsPos;
+		int lenFull2 = full2.length();
+		idxFull = idxFull2 = idxLean = newCharsPos = 0;
+		while (idxFull < lenFull && idxFull2 < lenFull2) {
+			cFull2 = full2.charAt(idxFull2);
+			cFull = full.charAt(idxFull);
+			if (cFull2 == cFull) { /* chars are equal, proceed */
+				if (cFull2 != curMark)
+					newChars[newCharsPos++] = chars[idxLean++];
+				idxFull++;
+				idxFull2++;
+				continue;
+			}
+			if (cFull2 == curMark) { /* extra Mark in full2 text */
+				idxFull2++;
+				continue;
+			}
+			if (cFull == curMark) { /* extra Mark in source full text */
+				idxFull++;
+				// idxFull-2 always >= 0 since leading Marks were removed from full
+				if (full.charAt(idxFull - 2) == curMark)
+					continue; // ignore successive Marks in full after the first one
+				newChars[newCharsPos++] = curMark;
+				continue;
+			}
+			// we should never get here (extra char which is not a Mark)
+			throw new IllegalStateException("Internal error: extra character not a Mark."); //$NON-NLS-1$
+		}
+		if (idxFull < lenFull) /* full2 ended before full - this should never happen since
+								              we removed all marks and PDFs at the end of full */
+			throw new IllegalStateException("Internal error: unexpected EOL."); //$NON-NLS-1$
+
+		lean = new String(newChars, 0, newCharsPos);
+		return lean;
+	}
+
+	public int[] fullToLeanMap(String full) {
+		int lenFull = full.length();
+		if (lenFull == 0)
+			return EMPTY_INT_ARRAY;
+		String lean = fullToLeanText(full);
+		int lenLean = lean.length();
+		int dir = handler.getDirection(this, lean);
+		char curMark = MARKS[dir];
+		char curEmbed = EMBEDS[dir];
+		int[] map = new int[lenFull];
+		int idxFull, idxLean;
+		// skip any prefix and leading mark
+		for (idxFull = 0; idxFull < lenFull; idxFull++) {
+			char c = full.charAt(idxFull);
+			if (c != curEmbed && c != curMark)
+				break;
+			map[idxFull] = -1;
+		}
+		// lean must be a subset of Full, so we only check on iLean < leanLen
+		for (idxLean = 0; idxLean < lenLean; idxFull++) {
+			if (full.charAt(idxFull) == lean.charAt(idxLean)) {
+				map[idxFull] = idxLean;
+				idxLean++;
+			} else
+				map[idxFull] = -1;
+		}
+		for (; idxFull < lenFull; idxFull++)
+			map[idxFull] = -1;
+		return map;
+	}
+
+	public int[] fullBidiCharOffsets(String full) {
+		int lenFull = full.length();
+		if (lenFull == 0)
+			return EMPTY_INT_ARRAY;
+		String lean = fullToLeanText(full);
+		StructuredTextOffsets offsets = new StructuredTextOffsets();
+		int lenLean = lean.length();
+		int idxLean, idxFull;
+		// lean must be a subset of Full, so we only check on iLean < leanLen
+		for (idxLean = idxFull = 0; idxLean < lenLean; idxFull++) {
+			if (full.charAt(idxFull) == lean.charAt(idxLean))
+				idxLean++;
+			else
+				offsets.insertOffset(null, idxFull);
+		}
+		for (; idxFull < lenFull; idxFull++)
+			offsets.insertOffset(null, idxFull);
+		return offsets.getOffsets();
+	}
+
+	public String insertMarks(String text, int[] offsets, int direction, int affixLength) {
+		if (direction != DIR_LTR && direction != DIR_RTL)
+			throw new IllegalArgumentException("Invalid direction"); //$NON-NLS-1$
+		if (affixLength < 0 || affixLength > 2)
+			throw new IllegalArgumentException("Invalid affix length"); //$NON-NLS-1$
+		int count = offsets == null ? 0 : offsets.length;
+		if (count == 0 && affixLength == 0)
+			return text;
+		int textLength = text.length();
+		if (textLength == 0)
+			return text;
+		int newLen = textLength + count;
+		if (affixLength == 1)
+			newLen++; /* +1 for a mark char */
+		else if (affixLength == 2)
+			newLen += FIXES_LENGTH;
+		char[] fullChars = new char[newLen];
+		int added = affixLength;
+		// add marks at offsets
+		char curMark = MARKS[direction];
+		for (int i = 0, j = 0; i < textLength; i++) {
+			char c = text.charAt(i);
+			if (j < count && i == offsets[j]) {
+				fullChars[i + added] = curMark;
+				added++;
+				j++;
+			}
+			fullChars[i + added] = c;
+		}
+		if (affixLength > 0) { /* add prefix/suffix ? */
+			if (affixLength == 1) { /* contextual orientation */
+				fullChars[0] = curMark;
+			} else {
+				// When the orientation is RTL, we need to add EMBED at the
+				// start of the text and PDF at its end.
+				// However, because of a bug in Windows' handling of LRE/RLE/PDF,
+				// we add LRM or RLM (according to the direction) after the 
+				// LRE/RLE and again before the PDF.
+				char curEmbed = EMBEDS[direction];
+				fullChars[0] = curEmbed;
+				fullChars[1] = curMark;
+				fullChars[newLen - 1] = PDF;
+				fullChars[newLen - 2] = curMark;
+			}
+		}
+		return new String(fullChars);
+	}
+
+}