Skip to main content
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal')
-rw-r--r--org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/Html2TextReader.java317
-rw-r--r--org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SingleCharReader.java74
-rw-r--r--org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SubstitutionTextReader.java173
-rw-r--r--org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java4
-rw-r--r--org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java3
-rw-r--r--org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java3
-rw-r--r--org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java3
7 files changed, 577 insertions, 0 deletions
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/Html2TextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/Html2TextReader.java
new file mode 100644
index 00000000..df3dfaeb
--- /dev/null
+++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/Html2TextReader.java
@@ -0,0 +1,317 @@
+/*******************************************************************************
+ * Copyright (c) 2000, 2008 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.mylyn.internal.commons.core;
+
+import java.io.IOException;
+import java.io.PushbackReader;
+import java.io.Reader;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.Locale;
+import java.util.Map;
+import java.util.Set;
+
+/**
+ * Reads the text contents from a reader of HTML contents and translates the tags or cut them out.
+ * <p>
+ * Moved into this package from <code>org.eclipse.jface.internal.text.revisions</code>.
+ * </p>
+ * <p>
+ * Based on {@link org.eclipse.jface.internal.text.html.HTML2TextReader}.
+ * </p>
+ */
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class Html2TextReader extends SubstitutionTextReader {
+
+ private static final String EMPTY_STRING = ""; //$NON-NLS-1$
+
+ private static final Map fgEntityLookup;
+
+ private static final Set fgTags;
+
+ static {
+
+ fgTags = new HashSet();
+ fgTags.add("b"); //$NON-NLS-1$
+ fgTags.add("br"); //$NON-NLS-1$
+ fgTags.add("br/"); //$NON-NLS-1$
+ fgTags.add("div"); //$NON-NLS-1$
+ fgTags.add("h1"); //$NON-NLS-1$
+ fgTags.add("h2"); //$NON-NLS-1$
+ fgTags.add("h3"); //$NON-NLS-1$
+ fgTags.add("h4"); //$NON-NLS-1$
+ fgTags.add("h5"); //$NON-NLS-1$
+ fgTags.add("p"); //$NON-NLS-1$
+ fgTags.add("dl"); //$NON-NLS-1$
+ fgTags.add("dt"); //$NON-NLS-1$
+ fgTags.add("dd"); //$NON-NLS-1$
+ fgTags.add("li"); //$NON-NLS-1$
+ fgTags.add("ul"); //$NON-NLS-1$
+ fgTags.add("pre"); //$NON-NLS-1$
+ fgTags.add("head"); //$NON-NLS-1$
+
+ fgEntityLookup = new HashMap(7);
+ fgEntityLookup.put("lt", "<"); //$NON-NLS-1$ //$NON-NLS-2$
+ fgEntityLookup.put("gt", ">"); //$NON-NLS-1$ //$NON-NLS-2$
+ fgEntityLookup.put("nbsp", " "); //$NON-NLS-1$ //$NON-NLS-2$
+ fgEntityLookup.put("amp", "&"); //$NON-NLS-1$ //$NON-NLS-2$
+ fgEntityLookup.put("circ", "^"); //$NON-NLS-1$ //$NON-NLS-2$
+ fgEntityLookup.put("tilde", "~"); //$NON-NLS-1$ //$NON-NLS-2$
+ fgEntityLookup.put("quot", "\""); //$NON-NLS-1$ //$NON-NLS-2$
+ }
+
+ private boolean fInParagraph = false;
+
+ private boolean fIsPreformattedText = false;
+
+ private boolean fIgnore = false;
+
+ /**
+ * Transforms the HTML text from the reader to formatted text.
+ *
+ * @param reader
+ * the reader
+ * @param presentation
+ * If not <code>null</code>, formattings will be applied to the presentation.
+ */
+ public Html2TextReader(Reader reader) {
+ super(new PushbackReader(reader));
+ }
+
+ protected void startBold() {
+ }
+
+ protected void startPreformattedText() {
+ fIsPreformattedText = true;
+ //setSkipWhitespace(false);
+ }
+
+ protected void stopPreformattedText() {
+ fIsPreformattedText = false;
+ //setSkipWhitespace(true);
+ }
+
+ protected void stopBold() {
+ }
+
+ /*
+ * @see org.eclipse.jdt.internal.ui.text.SubstitutionTextReader#computeSubstitution(int)
+ */
+ @Override
+ protected String computeSubstitution(int c) throws IOException {
+
+ if (c == '<') {
+ return processHTMLTag();
+ } else if (fIgnore) {
+ return EMPTY_STRING;
+ } else if (c == '&') {
+ return processEntity();
+ } else if (fIsPreformattedText) {
+ return processPreformattedText(c);
+ } else if (c == '\n') {
+ return EMPTY_STRING;
+ }
+
+ return null;
+ }
+
+ private String html2Text(String html) {
+
+ if (html == null || html.length() == 0) {
+ return EMPTY_STRING;
+ }
+
+ html = html.toLowerCase(Locale.ENGLISH);
+
+ String tag = html;
+ if ('/' == tag.charAt(0)) {
+ tag = tag.substring(1);
+ }
+
+ if (!fgTags.contains(tag)) {
+ return EMPTY_STRING;
+ }
+
+ if ("pre".equals(html)) { //$NON-NLS-1$
+ startPreformattedText();
+ return EMPTY_STRING;
+ }
+
+ if ("/pre".equals(html)) { //$NON-NLS-1$
+ stopPreformattedText();
+ return EMPTY_STRING;
+ }
+
+ if (fIsPreformattedText) {
+ return EMPTY_STRING;
+ }
+
+ if ("b".equals(html)) { //$NON-NLS-1$
+ startBold();
+ return EMPTY_STRING;
+ }
+
+ if ((html.length() > 1 && html.charAt(0) == 'h' && Character.isDigit(html.charAt(1))) || "dt".equals(html)) { //$NON-NLS-1$
+ startBold();
+ return EMPTY_STRING;
+ }
+
+ if ("dl".equals(html)) { //$NON-NLS-1$
+ return LINE_DELIM;
+ }
+
+ if ("dd".equals(html)) { //$NON-NLS-1$
+ return "\t"; //$NON-NLS-1$
+ }
+
+ if ("li".equals(html)) { //$NON-NLS-1$
+ return LINE_DELIM + " - "; //$NON-NLS-1$
+ }
+
+ if ("/b".equals(html)) { //$NON-NLS-1$
+ stopBold();
+ return EMPTY_STRING;
+ }
+
+ if ("p".equals(html)) { //$NON-NLS-1$
+ fInParagraph = true;
+ return LINE_DELIM;
+ }
+
+ if ("br".equals(html) || "br/".equals(html) || "div".equals(html)) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
+ return LINE_DELIM;
+ }
+
+ if ("/p".equals(html)) { //$NON-NLS-1$
+ boolean inParagraph = fInParagraph;
+ fInParagraph = false;
+ return inParagraph ? EMPTY_STRING : LINE_DELIM;
+ }
+
+ if ((html.startsWith("/h") && html.length() > 2 && Character.isDigit(html.charAt(2))) || "/dt".equals(html)) { //$NON-NLS-1$ //$NON-NLS-2$
+ stopBold();
+ return LINE_DELIM;
+ }
+
+ if ("/dd".equals(html)) { //$NON-NLS-1$
+ return LINE_DELIM;
+ }
+
+ if ("head".equals(html)) { //$NON-NLS-1$
+ fIgnore = true;
+ return EMPTY_STRING;
+ }
+
+ if ("/head".equals(html)) { //$NON-NLS-1$
+ fIgnore = false;
+ return EMPTY_STRING;
+ }
+
+ return EMPTY_STRING;
+ }
+
+ /*
+ * A '<' has been read. Process a html tag
+ */
+ private String processHTMLTag() throws IOException {
+
+ StringBuilder buf = new StringBuilder();
+ int ch;
+ do {
+
+ ch = nextChar();
+
+ while (ch != -1 && ch != '>') {
+ buf.append(Character.toLowerCase((char) ch));
+ ch = nextChar();
+ if (ch == '"') {
+ buf.append(Character.toLowerCase((char) ch));
+ ch = nextChar();
+ while (ch != -1 && ch != '"') {
+ buf.append(Character.toLowerCase((char) ch));
+ ch = nextChar();
+ }
+ }
+ if (ch == '<') {
+ unread(ch);
+ return '<' + buf.toString();
+ }
+ }
+
+ if (ch == -1) {
+ return null;
+ }
+
+ int tagLen = buf.length();
+ // needs special treatment for comments
+ if ((tagLen >= 3 && "!--".equals(buf.substring(0, 3))) //$NON-NLS-1$
+ && !(tagLen >= 5 && "--".equals(buf.substring(tagLen - 2)))) { //$NON-NLS-1$
+ // unfinished comment
+ buf.append(ch);
+ } else {
+ break;
+ }
+ } while (true);
+
+ return html2Text(buf.toString());
+ }
+
+ private String processPreformattedText(int c) {
+ return null;
+ }
+
+ private void unread(int ch) throws IOException {
+ ((PushbackReader) getReader()).unread(ch);
+ }
+
+ protected String entity2Text(String symbol) {
+ if (symbol.length() > 1 && symbol.charAt(0) == '#') {
+ int ch;
+ try {
+ if (symbol.charAt(1) == 'x') {
+ ch = Integer.parseInt(symbol.substring(2), 16);
+ } else {
+ ch = Integer.parseInt(symbol.substring(1), 10);
+ }
+ return EMPTY_STRING + (char) ch;
+ } catch (NumberFormatException e) {
+ }
+ } else {
+ String str = (String) fgEntityLookup.get(symbol);
+ if (str != null) {
+ return str;
+ }
+ }
+ return "&" + symbol; // not found //$NON-NLS-1$
+ }
+
+ /*
+ * A '&' has been read. Process a entity
+ */
+ private String processEntity() throws IOException {
+ StringBuilder buf = new StringBuilder();
+ int ch = nextChar();
+ while (Character.isLetterOrDigit((char) ch) || ch == '#') {
+ buf.append((char) ch);
+ ch = nextChar();
+ }
+
+ if (ch == ';') {
+ return entity2Text(buf.toString());
+ }
+
+ buf.insert(0, '&');
+ if (ch != -1) {
+ buf.append((char) ch);
+ }
+ return buf.toString();
+ }
+}
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SingleCharReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SingleCharReader.java
new file mode 100644
index 00000000..db64249a
--- /dev/null
+++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SingleCharReader.java
@@ -0,0 +1,74 @@
+/*******************************************************************************
+ * Copyright (c) 2000, 2007 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.mylyn.internal.commons.core;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * <p>
+ * Moved into this package from <code>org.eclipse.jface.internal.text.revisions</code>.
+ * </p>
+ * <p>
+ * Based on {@link org.eclipse.mylyn.internal.commons.core.jface.internal.text.html.SingleCharReader}.
+ * </p>
+ */
+public abstract class SingleCharReader extends Reader {
+
+ /**
+ * @see Reader#read()
+ */
+ @Override
+ public abstract int read() throws IOException;
+
+ /**
+ * @see Reader#read(char[],int,int)
+ */
+ @Override
+ public int read(char cbuf[], int off, int len) throws IOException {
+ int end = off + len;
+ for (int i = off; i < end; i++) {
+ int ch = read();
+ if (ch == -1) {
+ if (i == off) {
+ return -1;
+ }
+ return i - off;
+ }
+ cbuf[i] = (char) ch;
+ }
+ return len;
+ }
+
+ /**
+ * @see Reader#ready()
+ */
+ @Override
+ public boolean ready() throws IOException {
+ return true;
+ }
+
+ /**
+ * Returns the readable content as string.
+ *
+ * @return the readable content as string
+ * @exception IOException
+ * in case reading fails
+ */
+ public String getString() throws IOException {
+ StringBuffer buf = new StringBuffer();
+ int ch;
+ while ((ch = read()) != -1) {
+ buf.append((char) ch);
+ }
+ return buf.toString();
+ }
+}
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SubstitutionTextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SubstitutionTextReader.java
new file mode 100644
index 00000000..58ca6815
--- /dev/null
+++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SubstitutionTextReader.java
@@ -0,0 +1,173 @@
+/*******************************************************************************
+ * Copyright (c) 2000, 2007 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.mylyn.internal.commons.core;
+
+import java.io.IOException;
+import java.io.Reader;
+
+/**
+ * Reads the text contents from a reader and computes for each character a potential substitution. The substitution may
+ * eat more characters than only the one passed into the computation routine.
+ * <p>
+ * Moved into this package from <code>org.eclipse.jface.internal.text.revisions</code>.
+ * </p>
+ * <p>
+ * Based on {@link org.eclipse.mylyn.internal.commons.core.jface.internal.text.html.SubstitutionTextReader}.
+ * </p>
+ */
+public abstract class SubstitutionTextReader extends SingleCharReader {
+
+ protected static final String LINE_DELIM = System.getProperty("line.separator", "\n"); //$NON-NLS-1$ //$NON-NLS-2$
+
+ private final Reader fReader;
+
+ protected boolean fWasWhiteSpace;
+
+ private int fCharAfterWhiteSpace;
+
+ /**
+ * Tells whether white space characters are skipped.
+ */
+ private boolean fSkipWhiteSpace = true;
+
+ private boolean fReadFromBuffer;
+
+ private final StringBuffer fBuffer;
+
+ private int fIndex;
+
+ protected SubstitutionTextReader(Reader reader) {
+ fReader = reader;
+ fBuffer = new StringBuffer();
+ fIndex = 0;
+ fReadFromBuffer = false;
+ fCharAfterWhiteSpace = -1;
+ fWasWhiteSpace = true;
+ }
+
+ /**
+ * Computes the substitution for the given character and if necessary subsequent characters. Implementation should
+ * use <code>nextChar</code> to read subsequent characters.
+ *
+ * @param c
+ * the character to be substituted
+ * @return the substitution for <code>c</code>
+ * @throws IOException
+ * in case computing the substitution fails
+ */
+ protected abstract String computeSubstitution(int c) throws IOException;
+
+ /**
+ * Returns the internal reader.
+ *
+ * @return the internal reader
+ */
+ protected Reader getReader() {
+ return fReader;
+ }
+
+ /**
+ * Returns the next character.
+ *
+ * @return the next character
+ * @throws IOException
+ * in case reading the character fails
+ */
+ protected int nextChar() throws IOException {
+ fReadFromBuffer = (fBuffer.length() > 0);
+ if (fReadFromBuffer) {
+ char ch = fBuffer.charAt(fIndex++);
+ if (fIndex >= fBuffer.length()) {
+ fBuffer.setLength(0);
+ fIndex = 0;
+ }
+ return ch;
+ }
+
+ int ch = fCharAfterWhiteSpace;
+ if (ch == -1) {
+ ch = fReader.read();
+ }
+ if (fSkipWhiteSpace && Character.isWhitespace((char) ch)) {
+ do {
+ ch = fReader.read();
+ } while (Character.isWhitespace((char) ch));
+ if (ch != -1) {
+ fCharAfterWhiteSpace = ch;
+ return ' ';
+ }
+ } else {
+ fCharAfterWhiteSpace = -1;
+ }
+ return ch;
+ }
+
+ /**
+ * @see Reader#read()
+ */
+ @Override
+ public int read() throws IOException {
+ int c;
+ do {
+
+ c = nextChar();
+ while (!fReadFromBuffer) {
+ String s = computeSubstitution(c);
+ if (s == null) {
+ break;
+ }
+ if (s.length() > 0) {
+ fBuffer.insert(0, s);
+ }
+ c = nextChar();
+ }
+
+ } while (fSkipWhiteSpace && fWasWhiteSpace && (c == ' '));
+ fWasWhiteSpace = (c == ' ' || c == '\r' || c == '\n');
+ return c;
+ }
+
+ /**
+ * @see Reader#ready()
+ */
+ @Override
+ public boolean ready() throws IOException {
+ return fReader.ready();
+ }
+
+ /**
+ * @see Reader#close()
+ */
+ @Override
+ public void close() throws IOException {
+ fReader.close();
+ }
+
+ /**
+ * @see Reader#reset()
+ */
+ @Override
+ public void reset() throws IOException {
+ fReader.reset();
+ fWasWhiteSpace = true;
+ fCharAfterWhiteSpace = -1;
+ fBuffer.setLength(0);
+ fIndex = 0;
+ }
+
+ protected final void setSkipWhitespace(boolean state) {
+ fSkipWhiteSpace = state;
+ }
+
+ protected final boolean isSkippingWhitespace() {
+ return fSkipWhiteSpace;
+ }
+}
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java
index 4652e9b7..782d8ef0 100644
--- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java
+++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java
@@ -13,6 +13,10 @@ package org.eclipse.mylyn.internal.provisional.commons.core;
import org.eclipse.osgi.util.NLS;
+/**
+ * @deprecated use {@link org.eclipse.mylyn.commons.core.CommonMessages} instead
+ */
+@Deprecated
public class CommonMessages extends NLS {
private static final String BUNDLE_NAME = "org.eclipse.mylyn.internal.provisional.commons.core.messages"; //$NON-NLS-1$
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java
index 0023c046..bac08791 100644
--- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java
+++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java
@@ -27,7 +27,10 @@ import java.util.Set;
* <p>
* Based on {@link org.eclipse.jface.internal.text.html.HTML2TextReader}.
* </p>
+ *
+ * @deprecated use {@link org.eclipse.mylyn.internal.commons.core.Html2TextReader} instead
*/
+@Deprecated
@SuppressWarnings({ "rawtypes", "unchecked" })
public class Html2TextReader extends SubstitutionTextReader {
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java
index 02f3c314..c24ca9e6 100644
--- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java
+++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java
@@ -20,7 +20,10 @@ import java.io.Reader;
* <p>
* Based on {@link org.eclipse.jface.internal.text.html.SingleCharReader}.
* </p>
+ *
+ * @deprecated use {@link org.eclipse.mylyn.internal.commons.core.SingleCharReader} instead
*/
+@Deprecated
public abstract class SingleCharReader extends Reader {
/**
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java
index 6f46e866..a554ec51 100644
--- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java
+++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java
@@ -22,7 +22,10 @@ import java.io.Reader;
* <p>
* Based on {@link org.eclipse.jface.internal.text.html.SubstitutionTextReader}.
* </p>
+ *
+ * @deprecated use {@link org.eclipse.mylyn.internal.commons.core.SubstitutionTextReader} instead
*/
+@Deprecated
public abstract class SubstitutionTextReader extends SingleCharReader {
protected static final String LINE_DELIM = System.getProperty("line.separator", "\n"); //$NON-NLS-1$ //$NON-NLS-2$

Back to the top