diff options
Diffstat (limited to 'org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal')
7 files changed, 577 insertions, 0 deletions
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/Html2TextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/Html2TextReader.java new file mode 100644 index 00000000..df3dfaeb --- /dev/null +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/Html2TextReader.java @@ -0,0 +1,317 @@ +/******************************************************************************* + * Copyright (c) 2000, 2008 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ +package org.eclipse.mylyn.internal.commons.core; + +import java.io.IOException; +import java.io.PushbackReader; +import java.io.Reader; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Locale; +import java.util.Map; +import java.util.Set; + +/** + * Reads the text contents from a reader of HTML contents and translates the tags or cut them out. + * <p> + * Moved into this package from <code>org.eclipse.jface.internal.text.revisions</code>. + * </p> + * <p> + * Based on {@link org.eclipse.jface.internal.text.html.HTML2TextReader}. + * </p> + */ +@SuppressWarnings({ "rawtypes", "unchecked" }) +public class Html2TextReader extends SubstitutionTextReader { + + private static final String EMPTY_STRING = ""; //$NON-NLS-1$ + + private static final Map fgEntityLookup; + + private static final Set fgTags; + + static { + + fgTags = new HashSet(); + fgTags.add("b"); //$NON-NLS-1$ + fgTags.add("br"); //$NON-NLS-1$ + fgTags.add("br/"); //$NON-NLS-1$ + fgTags.add("div"); //$NON-NLS-1$ + fgTags.add("h1"); //$NON-NLS-1$ + fgTags.add("h2"); //$NON-NLS-1$ + fgTags.add("h3"); //$NON-NLS-1$ + fgTags.add("h4"); //$NON-NLS-1$ + fgTags.add("h5"); //$NON-NLS-1$ + fgTags.add("p"); //$NON-NLS-1$ + fgTags.add("dl"); //$NON-NLS-1$ + fgTags.add("dt"); //$NON-NLS-1$ + fgTags.add("dd"); //$NON-NLS-1$ + fgTags.add("li"); //$NON-NLS-1$ + fgTags.add("ul"); //$NON-NLS-1$ + fgTags.add("pre"); //$NON-NLS-1$ + fgTags.add("head"); //$NON-NLS-1$ + + fgEntityLookup = new HashMap(7); + fgEntityLookup.put("lt", "<"); //$NON-NLS-1$ //$NON-NLS-2$ + fgEntityLookup.put("gt", ">"); //$NON-NLS-1$ //$NON-NLS-2$ + fgEntityLookup.put("nbsp", " "); //$NON-NLS-1$ //$NON-NLS-2$ + fgEntityLookup.put("amp", "&"); //$NON-NLS-1$ //$NON-NLS-2$ + fgEntityLookup.put("circ", "^"); //$NON-NLS-1$ //$NON-NLS-2$ + fgEntityLookup.put("tilde", "~"); //$NON-NLS-1$ //$NON-NLS-2$ + fgEntityLookup.put("quot", "\""); //$NON-NLS-1$ //$NON-NLS-2$ + } + + private boolean fInParagraph = false; + + private boolean fIsPreformattedText = false; + + private boolean fIgnore = false; + + /** + * Transforms the HTML text from the reader to formatted text. + * + * @param reader + * the reader + * @param presentation + * If not <code>null</code>, formattings will be applied to the presentation. + */ + public Html2TextReader(Reader reader) { + super(new PushbackReader(reader)); + } + + protected void startBold() { + } + + protected void startPreformattedText() { + fIsPreformattedText = true; + //setSkipWhitespace(false); + } + + protected void stopPreformattedText() { + fIsPreformattedText = false; + //setSkipWhitespace(true); + } + + protected void stopBold() { + } + + /* + * @see org.eclipse.jdt.internal.ui.text.SubstitutionTextReader#computeSubstitution(int) + */ + @Override + protected String computeSubstitution(int c) throws IOException { + + if (c == '<') { + return processHTMLTag(); + } else if (fIgnore) { + return EMPTY_STRING; + } else if (c == '&') { + return processEntity(); + } else if (fIsPreformattedText) { + return processPreformattedText(c); + } else if (c == '\n') { + return EMPTY_STRING; + } + + return null; + } + + private String html2Text(String html) { + + if (html == null || html.length() == 0) { + return EMPTY_STRING; + } + + html = html.toLowerCase(Locale.ENGLISH); + + String tag = html; + if ('/' == tag.charAt(0)) { + tag = tag.substring(1); + } + + if (!fgTags.contains(tag)) { + return EMPTY_STRING; + } + + if ("pre".equals(html)) { //$NON-NLS-1$ + startPreformattedText(); + return EMPTY_STRING; + } + + if ("/pre".equals(html)) { //$NON-NLS-1$ + stopPreformattedText(); + return EMPTY_STRING; + } + + if (fIsPreformattedText) { + return EMPTY_STRING; + } + + if ("b".equals(html)) { //$NON-NLS-1$ + startBold(); + return EMPTY_STRING; + } + + if ((html.length() > 1 && html.charAt(0) == 'h' && Character.isDigit(html.charAt(1))) || "dt".equals(html)) { //$NON-NLS-1$ + startBold(); + return EMPTY_STRING; + } + + if ("dl".equals(html)) { //$NON-NLS-1$ + return LINE_DELIM; + } + + if ("dd".equals(html)) { //$NON-NLS-1$ + return "\t"; //$NON-NLS-1$ + } + + if ("li".equals(html)) { //$NON-NLS-1$ + return LINE_DELIM + " - "; //$NON-NLS-1$ + } + + if ("/b".equals(html)) { //$NON-NLS-1$ + stopBold(); + return EMPTY_STRING; + } + + if ("p".equals(html)) { //$NON-NLS-1$ + fInParagraph = true; + return LINE_DELIM; + } + + if ("br".equals(html) || "br/".equals(html) || "div".equals(html)) { //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + return LINE_DELIM; + } + + if ("/p".equals(html)) { //$NON-NLS-1$ + boolean inParagraph = fInParagraph; + fInParagraph = false; + return inParagraph ? EMPTY_STRING : LINE_DELIM; + } + + if ((html.startsWith("/h") && html.length() > 2 && Character.isDigit(html.charAt(2))) || "/dt".equals(html)) { //$NON-NLS-1$ //$NON-NLS-2$ + stopBold(); + return LINE_DELIM; + } + + if ("/dd".equals(html)) { //$NON-NLS-1$ + return LINE_DELIM; + } + + if ("head".equals(html)) { //$NON-NLS-1$ + fIgnore = true; + return EMPTY_STRING; + } + + if ("/head".equals(html)) { //$NON-NLS-1$ + fIgnore = false; + return EMPTY_STRING; + } + + return EMPTY_STRING; + } + + /* + * A '<' has been read. Process a html tag + */ + private String processHTMLTag() throws IOException { + + StringBuilder buf = new StringBuilder(); + int ch; + do { + + ch = nextChar(); + + while (ch != -1 && ch != '>') { + buf.append(Character.toLowerCase((char) ch)); + ch = nextChar(); + if (ch == '"') { + buf.append(Character.toLowerCase((char) ch)); + ch = nextChar(); + while (ch != -1 && ch != '"') { + buf.append(Character.toLowerCase((char) ch)); + ch = nextChar(); + } + } + if (ch == '<') { + unread(ch); + return '<' + buf.toString(); + } + } + + if (ch == -1) { + return null; + } + + int tagLen = buf.length(); + // needs special treatment for comments + if ((tagLen >= 3 && "!--".equals(buf.substring(0, 3))) //$NON-NLS-1$ + && !(tagLen >= 5 && "--".equals(buf.substring(tagLen - 2)))) { //$NON-NLS-1$ + // unfinished comment + buf.append(ch); + } else { + break; + } + } while (true); + + return html2Text(buf.toString()); + } + + private String processPreformattedText(int c) { + return null; + } + + private void unread(int ch) throws IOException { + ((PushbackReader) getReader()).unread(ch); + } + + protected String entity2Text(String symbol) { + if (symbol.length() > 1 && symbol.charAt(0) == '#') { + int ch; + try { + if (symbol.charAt(1) == 'x') { + ch = Integer.parseInt(symbol.substring(2), 16); + } else { + ch = Integer.parseInt(symbol.substring(1), 10); + } + return EMPTY_STRING + (char) ch; + } catch (NumberFormatException e) { + } + } else { + String str = (String) fgEntityLookup.get(symbol); + if (str != null) { + return str; + } + } + return "&" + symbol; // not found //$NON-NLS-1$ + } + + /* + * A '&' has been read. Process a entity + */ + private String processEntity() throws IOException { + StringBuilder buf = new StringBuilder(); + int ch = nextChar(); + while (Character.isLetterOrDigit((char) ch) || ch == '#') { + buf.append((char) ch); + ch = nextChar(); + } + + if (ch == ';') { + return entity2Text(buf.toString()); + } + + buf.insert(0, '&'); + if (ch != -1) { + buf.append((char) ch); + } + return buf.toString(); + } +} diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SingleCharReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SingleCharReader.java new file mode 100644 index 00000000..db64249a --- /dev/null +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SingleCharReader.java @@ -0,0 +1,74 @@ +/******************************************************************************* + * Copyright (c) 2000, 2007 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ +package org.eclipse.mylyn.internal.commons.core; + +import java.io.IOException; +import java.io.Reader; + +/** + * <p> + * Moved into this package from <code>org.eclipse.jface.internal.text.revisions</code>. + * </p> + * <p> + * Based on {@link org.eclipse.mylyn.internal.commons.core.jface.internal.text.html.SingleCharReader}. + * </p> + */ +public abstract class SingleCharReader extends Reader { + + /** + * @see Reader#read() + */ + @Override + public abstract int read() throws IOException; + + /** + * @see Reader#read(char[],int,int) + */ + @Override + public int read(char cbuf[], int off, int len) throws IOException { + int end = off + len; + for (int i = off; i < end; i++) { + int ch = read(); + if (ch == -1) { + if (i == off) { + return -1; + } + return i - off; + } + cbuf[i] = (char) ch; + } + return len; + } + + /** + * @see Reader#ready() + */ + @Override + public boolean ready() throws IOException { + return true; + } + + /** + * Returns the readable content as string. + * + * @return the readable content as string + * @exception IOException + * in case reading fails + */ + public String getString() throws IOException { + StringBuffer buf = new StringBuffer(); + int ch; + while ((ch = read()) != -1) { + buf.append((char) ch); + } + return buf.toString(); + } +} diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SubstitutionTextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SubstitutionTextReader.java new file mode 100644 index 00000000..58ca6815 --- /dev/null +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/commons/core/SubstitutionTextReader.java @@ -0,0 +1,173 @@ +/******************************************************************************* + * Copyright (c) 2000, 2007 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ +package org.eclipse.mylyn.internal.commons.core; + +import java.io.IOException; +import java.io.Reader; + +/** + * Reads the text contents from a reader and computes for each character a potential substitution. The substitution may + * eat more characters than only the one passed into the computation routine. + * <p> + * Moved into this package from <code>org.eclipse.jface.internal.text.revisions</code>. + * </p> + * <p> + * Based on {@link org.eclipse.mylyn.internal.commons.core.jface.internal.text.html.SubstitutionTextReader}. + * </p> + */ +public abstract class SubstitutionTextReader extends SingleCharReader { + + protected static final String LINE_DELIM = System.getProperty("line.separator", "\n"); //$NON-NLS-1$ //$NON-NLS-2$ + + private final Reader fReader; + + protected boolean fWasWhiteSpace; + + private int fCharAfterWhiteSpace; + + /** + * Tells whether white space characters are skipped. + */ + private boolean fSkipWhiteSpace = true; + + private boolean fReadFromBuffer; + + private final StringBuffer fBuffer; + + private int fIndex; + + protected SubstitutionTextReader(Reader reader) { + fReader = reader; + fBuffer = new StringBuffer(); + fIndex = 0; + fReadFromBuffer = false; + fCharAfterWhiteSpace = -1; + fWasWhiteSpace = true; + } + + /** + * Computes the substitution for the given character and if necessary subsequent characters. Implementation should + * use <code>nextChar</code> to read subsequent characters. + * + * @param c + * the character to be substituted + * @return the substitution for <code>c</code> + * @throws IOException + * in case computing the substitution fails + */ + protected abstract String computeSubstitution(int c) throws IOException; + + /** + * Returns the internal reader. + * + * @return the internal reader + */ + protected Reader getReader() { + return fReader; + } + + /** + * Returns the next character. + * + * @return the next character + * @throws IOException + * in case reading the character fails + */ + protected int nextChar() throws IOException { + fReadFromBuffer = (fBuffer.length() > 0); + if (fReadFromBuffer) { + char ch = fBuffer.charAt(fIndex++); + if (fIndex >= fBuffer.length()) { + fBuffer.setLength(0); + fIndex = 0; + } + return ch; + } + + int ch = fCharAfterWhiteSpace; + if (ch == -1) { + ch = fReader.read(); + } + if (fSkipWhiteSpace && Character.isWhitespace((char) ch)) { + do { + ch = fReader.read(); + } while (Character.isWhitespace((char) ch)); + if (ch != -1) { + fCharAfterWhiteSpace = ch; + return ' '; + } + } else { + fCharAfterWhiteSpace = -1; + } + return ch; + } + + /** + * @see Reader#read() + */ + @Override + public int read() throws IOException { + int c; + do { + + c = nextChar(); + while (!fReadFromBuffer) { + String s = computeSubstitution(c); + if (s == null) { + break; + } + if (s.length() > 0) { + fBuffer.insert(0, s); + } + c = nextChar(); + } + + } while (fSkipWhiteSpace && fWasWhiteSpace && (c == ' ')); + fWasWhiteSpace = (c == ' ' || c == '\r' || c == '\n'); + return c; + } + + /** + * @see Reader#ready() + */ + @Override + public boolean ready() throws IOException { + return fReader.ready(); + } + + /** + * @see Reader#close() + */ + @Override + public void close() throws IOException { + fReader.close(); + } + + /** + * @see Reader#reset() + */ + @Override + public void reset() throws IOException { + fReader.reset(); + fWasWhiteSpace = true; + fCharAfterWhiteSpace = -1; + fBuffer.setLength(0); + fIndex = 0; + } + + protected final void setSkipWhitespace(boolean state) { + fSkipWhiteSpace = state; + } + + protected final boolean isSkippingWhitespace() { + return fSkipWhiteSpace; + } +} diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java index 4652e9b7..782d8ef0 100644 --- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/CommonMessages.java @@ -13,6 +13,10 @@ package org.eclipse.mylyn.internal.provisional.commons.core; import org.eclipse.osgi.util.NLS; +/** + * @deprecated use {@link org.eclipse.mylyn.commons.core.CommonMessages} instead + */ +@Deprecated public class CommonMessages extends NLS { private static final String BUNDLE_NAME = "org.eclipse.mylyn.internal.provisional.commons.core.messages"; //$NON-NLS-1$ diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java index 0023c046..bac08791 100644 --- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/Html2TextReader.java @@ -27,7 +27,10 @@ import java.util.Set; * <p> * Based on {@link org.eclipse.jface.internal.text.html.HTML2TextReader}. * </p> + * + * @deprecated use {@link org.eclipse.mylyn.internal.commons.core.Html2TextReader} instead */ +@Deprecated @SuppressWarnings({ "rawtypes", "unchecked" }) public class Html2TextReader extends SubstitutionTextReader { diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java index 02f3c314..c24ca9e6 100644 --- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SingleCharReader.java @@ -20,7 +20,10 @@ import java.io.Reader; * <p> * Based on {@link org.eclipse.jface.internal.text.html.SingleCharReader}. * </p> + * + * @deprecated use {@link org.eclipse.mylyn.internal.commons.core.SingleCharReader} instead */ +@Deprecated public abstract class SingleCharReader extends Reader { /** diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java index 6f46e866..a554ec51 100644 --- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/internal/provisional/commons/core/SubstitutionTextReader.java @@ -22,7 +22,10 @@ import java.io.Reader; * <p> * Based on {@link org.eclipse.jface.internal.text.html.SubstitutionTextReader}. * </p> + * + * @deprecated use {@link org.eclipse.mylyn.internal.commons.core.SubstitutionTextReader} instead */ +@Deprecated public abstract class SubstitutionTextReader extends SingleCharReader { protected static final String LINE_DELIM = System.getProperty("line.separator", "\n"); //$NON-NLS-1$ //$NON-NLS-2$ |