| author | Steffen Pingel | 2011-12-27 07:46:11 (EST) |
|---|---|---|
| committer | Steffen Pingel | 2011-12-27 07:46:11 (EST) |
| commit | f35d3f54054b639f04c1ef8734dc499646375c8d (patch) (side-by-side diff) | |
| tree | 00659c61032349617210405dbdbc00f4e280e3a5 | |
| parent | 985b7edd53996894d750e1d31a92d64a3a34d711 (diff) | |
| download | org.eclipse.mylyn.commons-f35d3f54054b639f04c1ef8734dc499646375c8d.zip org.eclipse.mylyn.commons-f35d3f54054b639f04c1ef8734dc499646375c8d.tar.gz org.eclipse.mylyn.commons-f35d3f54054b639f04c1ef8734dc499646375c8d.tar.bz2 | |
NEW - bug 367573: [api] provide API for parsing HTML streams in
o.e.m.commons.core
https://bugs.eclipse.org/bugs/show_bug.cgi?id=367573
Change-Id: I8ebcc689787daea66f3a5c98389d510a3aa2ffae
6 files changed, 1544 insertions, 0 deletions
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlStreamTokenizer.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlStreamTokenizer.java new file mode 100644 index 0000000..ccbf789 --- a/dev/null +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlStreamTokenizer.java @@ -0,0 +1,1141 @@ +/******************************************************************************* + * Copyright (c) 2004, 2008 Tasktop Technologies and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Tasktop Technologies - initial API and implementation + *******************************************************************************/ + +package org.eclipse.mylyn.commons.core; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.Reader; +import java.net.URL; +import java.text.ParseException; +import java.util.HashMap; +import java.util.Locale; + +/** + * Parses HTML into tokens. + * + * @author Shawn Minto + * @since 3.7 + */ +public class HtmlStreamTokenizer { + + /** parser state */ + private State state; + + /** reader from which to parse the text */ + private final BufferedReader in; + + /** base URL for resolving relative URLs */ + private final URL base; + + /** buffer holding the text of the current token */ + private final StringBuffer textBuffer; + + /** buffer holding whitespace preceding the current token */ + private final StringBuffer whitespaceBuffer; + + /** + * holds a token that was read and then put back in the queue to be returned again on <code>nextToken</code> call + */ + private Token pushbackToken; + + /** + * holds a character that was read and then determined not to be part of the current token + */ + private int pushbackChar; + + /** current quote delimiter (single or double) */ + private int quoteChar; + + /** Allow class client to choose if tag attributes are escaped or not */ + private boolean escapeTagValues; + + /** + * Constructor. + * + * @param in + * reader for the HTML document to tokenize + * @param base + * URL for resolving relative URLs + */ + public HtmlStreamTokenizer(Reader in, URL base) { + textBuffer = new StringBuffer(); + whitespaceBuffer = new StringBuffer(); + pushbackChar = 0; + state = State.TEXT; + this.in = new BufferedReader(in); + this.base = base; + escapeTagValues = true; + } + + public void escapeTagAttributes(boolean value) { + escapeTagValues = value; + } + + /** + * Returns the next token from the stream. + */ + public Token nextToken() throws IOException, ParseException { + if (pushbackToken != null) { + Token token = pushbackToken; + pushbackToken = null; + return token; + } + + int closingComment = 0; + + textBuffer.setLength(0); + whitespaceBuffer.setLength(0); + do { + int ch; + if (pushbackChar != 0) { + ch = pushbackChar; + pushbackChar = 0; + } else { + ch = in.read(); + } + if (ch < 0) { + State oldState = state; + state = State.EOF; + if (textBuffer.length() > 0 && oldState == State.TEXT) { + return new Token(textBuffer, whitespaceBuffer, false); + } else { + return new Token(); + } + } + if (state == State.TEXT) { + if (ch == '<') { + state = State.TAG; + if (textBuffer.length() > 0) { + return new Token(textBuffer, whitespaceBuffer, false); + } + } else if (Character.isWhitespace((char) ch)) { + pushbackChar = ch; + state = State.WS; + if (textBuffer.length() > 0) { + return new Token(textBuffer, whitespaceBuffer, false); + } + } else { + textBuffer.append((char) ch); + } + } else if (state == State.WS) { + if (!Character.isWhitespace((char) ch)) { + pushbackChar = ch; + state = State.TEXT; + } else { + whitespaceBuffer.append((char) ch); + } + } else if (state == State.TAG) { + if (ch == '>') { + state = State.TEXT; + HtmlTag tag = new HtmlTag(base); + parseTag(textBuffer.toString(), tag, escapeTagValues); + return new Token(tag, whitespaceBuffer); + } + if (ch == '<' && textBuffer.length() == 0) { + textBuffer.append("<<"); //$NON-NLS-1$ + state = State.TEXT; + } else if (ch == '-' && textBuffer.length() == 2 && textBuffer.charAt(1) == '-' + && textBuffer.charAt(0) == '!') { + textBuffer.setLength(0); + state = State.COMMENT; + } else if (ch == '\'' || ch == '"') { + quoteChar = ch; + textBuffer.append((char) ch); + state = State.TAG_QUOTE; + } else { + textBuffer.append((char) ch); + } + } else if (state == State.TAG_QUOTE) { + if (ch == '>') { + pushbackChar = ch; + state = State.TAG; + } else { + textBuffer.append((char) ch); + if (ch == quoteChar) { + state = State.TAG; + } + } + } else if (state == State.COMMENT) { + if (ch == '>' && closingComment >= 2) { + textBuffer.setLength(textBuffer.length() - 2); + closingComment = 0; + state = State.TEXT; + return new Token(textBuffer, whitespaceBuffer, true); + } + if (ch == '-') { + closingComment++; + } else { + closingComment = 0; + } + textBuffer.append((char) ch); + } + } while (true); + } + + /** + * Pushes the token back into the queue, to be returned by the subsequent call to <code>nextToken</code> + */ + public void pushback(Token token) { + pushbackToken = token; + } + + /** + * Parses an HTML tag out of a string of characters. + */ + private static void parseTag(String s, HtmlTag tag, boolean escapeValues) throws ParseException { + + int i = 0; + for (; i < s.length() && Character.isWhitespace(s.charAt(i)); i++) { + // just move forward + } + if (i == s.length()) { + throw new ParseException("parse empty tag", 0); //$NON-NLS-1$ + } + + int start = i; + for (; i < s.length() && !Character.isWhitespace(s.charAt(i)); i++) { + // just move forward + } + tag.setTagName(s.substring(start, i)); + + for (; i < s.length() && Character.isWhitespace(s.charAt(i)); i++) { + // just move forward + } + if (i == s.length()) { + return; + } else { + parseAttributes(tag, s, i, escapeValues); + return; + } + } + + /** + * parses HTML tag attributes from a buffer and sets them in an HtmlTag + */ + private static void parseAttributes(HtmlTag tag, String s, int i, boolean escapeValues) throws ParseException { + while (i < s.length()) { + // skip whitespace + while (i < s.length() && Character.isWhitespace(s.charAt(i))) { + i++; + } + + if (i == s.length()) { + return; + } + + // read the attribute name -- the rule might be looser than the RFC + // specifies: + // everything up to a space or an equal sign is included + int start = i; + for (; i < s.length() && !Character.isWhitespace(s.charAt(i)) && s.charAt(i) != '='; i++) { + // just move forward + } + String attributeName = s.substring(start, i).toLowerCase(Locale.ENGLISH); + + if (attributeName.equals("/")) { //$NON-NLS-1$ + tag.setSelfTerminating(true); + continue; + } + + for (; i < s.length() && Character.isWhitespace(s.charAt(i)); i++) { + // just move forward + } + if (i == s.length() || s.charAt(i) != '=') { + // no attribute value + tag.setAttribute(attributeName, ""); //$NON-NLS-1$ + continue; + } + + // skip whitespace to the start of attribute value + for (i = i + 1; i < s.length() && Character.isWhitespace(s.charAt(i)); i++) { + // just move forward + } + if (i == s.length()) { + return; + } + + // read the attribute value -- the rule for unquoted attribute value + // is + // looser than the one in Conolly's W3C 1996 lexical analyzer draft: + // everything + // is included up to the next space + String attributeValue; + if (s.charAt(i) == '"') { + start = ++i; + for (; i < s.length() && s.charAt(i) != '"'; i++) { + // just move forward + } + if (i == s.length()) { + return; // shouldn't happen if input returned by nextToken + } + if (escapeValues) { + attributeValue = unescape(s.substring(start, i)); + } else { + attributeValue = s.substring(start, i); + } + i++; + } else if (s.charAt(i) == '\'') { + start = ++i; + for (; i < s.length() && s.charAt(i) != '\''; i++) { + // just move forward + } + if (i == s.length()) { + return; // shouldn't happen if input returned by nextToken + } + attributeValue = unescape(s.substring(start, i)); + i++; + } else { + start = i; + for (; i < s.length() && !Character.isWhitespace(s.charAt(i)); i++) { + // just move forward + } + attributeValue = s.substring(start, i); + } + tag.setAttribute(attributeName, attributeValue); + } + } + + /** + * Returns a string with HTML escapes changed into their corresponding characters. + * + * @deprecated use {@link StringEscapeUtils#unescapeHtml(String)} instead + */ + @Deprecated + public static String unescape(String s) { + if (s.indexOf('&') == -1) { + return s; + } else { + StringBuffer sb = new StringBuffer(s); + unescape(sb); + return sb.toString(); + } + } + + /** + * Replaces (in-place) HTML escapes in a StringBuffer with their corresponding characters. + * + * @deprecated use {@link StringEscapeUtils#unescapeHtml(String)} instead + */ + @Deprecated + public static StringBuffer unescape(StringBuffer sb) { + int i = 0; // index into the unprocessed section of the buffer + int j = 0; // index into the processed section of the buffer + + while (i < sb.length()) { + char ch = sb.charAt(i); + if (ch == '&') { + int start = i; + String escape = null; + for (i = i + 1; i < sb.length(); i++) { + ch = sb.charAt(i); + if (!Character.isLetterOrDigit(ch) && !(ch == '#' && i == (start + 1))) { + escape = sb.substring(start + 1, i); + break; + } + } + if (i == sb.length() && i != (start + 1)) { + escape = sb.substring(start + 1); + } + if (escape != null) { + Character character = parseReference(escape); + if (character != null + && !((0x0A == character || 0x0D == character || 0x09 == ch) + || (character >= 0x20 && character <= 0xD7FF) + || (character >= 0xE000 && character <= 0xFFFD) || (character >= 0x10000 && character <= 0x10FFFF))) { + // Character is an invalid xml character + // http://www.w3.org/TR/REC-xml/#charsets + character = null; + } + if (character != null) { + ch = character.charValue(); + } else { + // not an HTML escape; rewind + i = start; + ch = '&'; + } + } + } + sb.setCharAt(j, ch); + i++; + j++; + } + + sb.setLength(j); + return sb; + } + + /** + * Parses HTML character and entity references and returns the corresponding character. + */ + private static Character parseReference(String s) { + if (s.length() == 0) { + return null; + } + + if (s.charAt(0) == '#') { + // character reference + if (s.length() == 1) { + return null; + } + + try { + int value; + if (s.charAt(1) == 'x') { + // Hex reference + value = Integer.parseInt(s.substring(2), 16); + } else { + // Decimal reference + value = Integer.parseInt(s.substring(1)); + } + return new Character((char) value); + } catch (NumberFormatException e) { + return null; + } + } else { + return entities.get(s); + } + } + + /** + * Class for current token. + */ + public static class Token { + public static final Type EOF = new Type(); + + public static final Type TEXT = new Type(); + + public static final Type TAG = new Type(); + + public static final Type COMMENT = new Type(); + + /** token's type */ + private Type type; + + /** token's value */ + private final Object value; + + /** whitespace preceding the token */ + private final StringBuffer whitespace; + + /** + * Constructor for the EOF token. + */ + protected Token() { + type = EOF; + value = null; + whitespace = null; + } + + /** + * Constructor for the HTML tag tokens. + */ + protected Token(HtmlTag tag, StringBuffer whitespace) { + type = TAG; + value = tag; + this.whitespace = whitespace; + } + + /** + * Constructor for regular text and comments. + */ + protected Token(StringBuffer text, StringBuffer whitespace, boolean comment) { + if (comment) { + type = COMMENT; + } else { + type = TEXT; + } + this.value = text; + this.whitespace = whitespace; + } + + /** + * Returns the token's type. + */ + public Type getType() { + return type; + } + + /** + * Returns the whitespace preceding the token. + */ + public StringBuffer getWhitespace() { + return whitespace; + } + + /** + * Returns the token's value. This is an HtmlTag for tokens of type <code>TAG</code> and a StringBuffer for + * tokens of type <code>TEXT</code> and <code>COMMENT</code>. For tokens of type <code>EOF</code>, the value is + * <code>null</code>. + */ + public Object getValue() { + return value; + } + + /** + * Returns the string representation of the token, including the preceding whitespace. + */ + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + if (whitespace != null) { + sb.append(whitespace); + } + if (value != null) { + if (type == TAG) { + // sb.append('<'); + } else if (type == COMMENT) { + sb.append("<!--"); //$NON-NLS-1$ + } + sb.append(value); + if (type == TAG) { + // if(value instanceof HtmlTag) { + // HtmlTag htmlTag = (HtmlTag)value; + // if(htmlTag.getTagName().startsWith("?xml")) { + // sb.append("?>"); + // } + // } else { + // sb.append('>'); + + } else if (type == COMMENT) { + sb.append("-->"); //$NON-NLS-1$ + } + + } + return sb.toString(); + } + + /** + * Private enum class for token type. + */ + private static class Type { + private Type() { + // don't need to do anything + } + } + } + + /** + * Enum class for parser state. + */ + private static class State { + static final State EOF = new State(); + + static final State COMMENT = new State(); + + static final State TEXT = new State(); + + static final State TAG = new State(); + + static final State WS = new State(); + + static final State TAG_QUOTE = new State(); + + private State() { + // don't need to do anything + } + } + + /** names and values of HTML entity references */ + private static HashMap<String, Character> entities; + + /* + * Based on ISO 8879. + * + * Portions (c) International Organization for Standardization 1986 + * Permission to copy in any form is granted for use with conforming SGML + * systems and applications as defined in ISO 8879, provided this notice is + * included in all copies. + * + */ + static { + entities = new HashMap<String, Character>(); + entities.put("nbsp", Character.valueOf('\240')); // no-break //$NON-NLS-1$ + // space = + // non-breaking + // space + entities.put("iexcl", Character.valueOf('\241')); // inverted //$NON-NLS-1$ + // exclamation + // mark + entities.put("cent", Character.valueOf('\242')); // cent sign //$NON-NLS-1$ + entities.put("pound", Character.valueOf('\243')); // pound //$NON-NLS-1$ + // sign + entities.put("curren", Character.valueOf('\244')); // currency //$NON-NLS-1$ + // sign + entities.put("yen", Character.valueOf('\245')); // yen sign = //$NON-NLS-1$ + // yuan sign + entities.put("brvbar", Character.valueOf('\246')); // broken //$NON-NLS-1$ + // bar = + // broken + // vertical + // bar + entities.put("sect", Character.valueOf('\247')); // section //$NON-NLS-1$ + // sign + entities.put("uml", Character.valueOf('\250')); // diaeresis = //$NON-NLS-1$ + // spacing + // diaeresis + entities.put("copy", Character.valueOf('\251')); // copyright //$NON-NLS-1$ + // sign + entities.put("ordf", Character.valueOf('\252')); // feminine //$NON-NLS-1$ + // ordinal + // indicator + entities.put("laquo", Character.valueOf('\253')); // left-pointing //$NON-NLS-1$ + // double + // angle + // quotation + // mark = + // left + // pointing + // guillemet + entities.put("not", Character.valueOf('\254')); // not sign //$NON-NLS-1$ + entities.put("shy", Character.valueOf('\255')); // soft hyphen = //$NON-NLS-1$ + // discretionary + // hyphen + entities.put("reg", Character.valueOf('\256')); // registered //$NON-NLS-1$ + // sign = + // registered + // trade mark + // sign + entities.put("macr", Character.valueOf('\257')); // macron = //$NON-NLS-1$ + // spacing + // macron = + // overline + // = APL + // overbar + entities.put("deg", Character.valueOf('\260')); // degree sign //$NON-NLS-1$ + entities.put("plusmn", Character.valueOf('\261')); // plus-minus //$NON-NLS-1$ + // sign = + // plus-or-minus + // sign + entities.put("sup2", Character.valueOf('\262')); // superscript //$NON-NLS-1$ + // two = + // superscript + // digit two + // = squared + entities.put("sup3", Character.valueOf('\263')); // superscript //$NON-NLS-1$ + // three = + // superscript + // digit + // three = + // cubed + entities.put("acute", Character.valueOf('\264')); // acute //$NON-NLS-1$ + // accent = + // spacing + // acute + entities.put("micro", Character.valueOf('\265')); // micro //$NON-NLS-1$ + // sign + entities.put("para", Character.valueOf('\266')); // pilcrow //$NON-NLS-1$ + // sign = + // paragraph + // sign + entities.put("middot", Character.valueOf('\267')); // middle //$NON-NLS-1$ + // dot = + // Georgian + // comma = + // Greek + // middle + // dot + entities.put("cedil", Character.valueOf('\270')); // cedilla = //$NON-NLS-1$ + // spacing + // cedilla + entities.put("sup1", Character.valueOf('\271')); // superscript //$NON-NLS-1$ + // one = + // superscript + // digit one + entities.put("ordm", Character.valueOf('\272')); // masculine //$NON-NLS-1$ + // ordinal + // indicator + entities.put("raquo", Character.valueOf('\273')); // right-pointing //$NON-NLS-1$ + // double + // angle + // quotation + // mark = + // right + // pointing + // guillemet + entities.put("frac14", Character.valueOf('\274')); // vulgar //$NON-NLS-1$ + // fraction + // one + // quarter = + // fraction + // one + // quarter + entities.put("frac12", Character.valueOf('\275')); // vulgar //$NON-NLS-1$ + // fraction + // one half + // = + // fraction + // one half + entities.put("frac34", Character.valueOf('\276')); // vulgar //$NON-NLS-1$ + // fraction + // three + // quarters + // = + // fraction + // three + // quarters + entities.put("iquest", Character.valueOf('\277')); // inverted //$NON-NLS-1$ + // question + // mark = + // turned + // question + // mark + entities.put("Agrave", Character.valueOf('\300')); // latin //$NON-NLS-1$ + // capital + // letter A + // with + // grave = + // latin + // capital + // letter A + // grave + entities.put("Aacute", Character.valueOf('\301')); // latin //$NON-NLS-1$ + // capital + // letter A + // with + // acute + entities.put("Acirc", Character.valueOf('\302')); // latin //$NON-NLS-1$ + // capital + // letter A + // with + // circumflex + entities.put("Atilde", Character.valueOf('\303')); // latin //$NON-NLS-1$ + // capital + // letter A + // with + // tilde + entities.put("Auml", Character.valueOf('\304')); // latin //$NON-NLS-1$ + // capital + // letter A + // with + // diaeresis + entities.put("Aring", Character.valueOf('\305')); // latin //$NON-NLS-1$ + // capital + // letter A + // with ring + // above = + // latin + // capital + // letter A + // ring + entities.put("AElig", Character.valueOf('\306')); // latin //$NON-NLS-1$ + // capital + // letter AE + // = latin + // capital + // ligature + // AE + entities.put("Ccedil", Character.valueOf('\307')); // latin //$NON-NLS-1$ + // capital + // letter C + // with + // cedilla + entities.put("Egrave", Character.valueOf('\310')); // latin //$NON-NLS-1$ + // capital + // letter E + // with + // grave + entities.put("Eacute", Character.valueOf('\311')); // latin //$NON-NLS-1$ + // capital + // letter E + // with + // acute + entities.put("Ecirc", Character.valueOf('\312')); // latin //$NON-NLS-1$ + // capital + // letter E + // with + // circumflex + entities.put("Euml", Character.valueOf('\313')); // latin //$NON-NLS-1$ + // capital + // letter E + // with + // diaeresis + entities.put("Igrave", Character.valueOf('\314')); // latin //$NON-NLS-1$ + // capital + // letter I + // with + // grave + entities.put("Iacute", Character.valueOf('\315')); // latin //$NON-NLS-1$ + // capital + // letter I + // with + // acute + entities.put("Icirc", Character.valueOf('\316')); // latin //$NON-NLS-1$ + // capital + // letter I + // with + // circumflex + entities.put("Iuml", Character.valueOf('\317')); // latin //$NON-NLS-1$ + // capital + // letter I + // with + // diaeresis + entities.put("ETH", Character.valueOf('\320')); // latin capital //$NON-NLS-1$ + // letter ETH + entities.put("Ntilde", Character.valueOf('\321')); // latin //$NON-NLS-1$ + // capital + // letter N + // with + // tilde + entities.put("Ograve", Character.valueOf('\322')); // latin //$NON-NLS-1$ + // capital + // letter O + // with + // grave + entities.put("Oacute", Character.valueOf('\323')); // latin //$NON-NLS-1$ + // capital + // letter O + // with + // acute + entities.put("Ocirc", Character.valueOf('\324')); // latin //$NON-NLS-1$ + // capital + // letter O + // with + // circumflex + entities.put("Otilde", Character.valueOf('\325')); // latin //$NON-NLS-1$ + // capital + // letter O + // with + // tilde + entities.put("Ouml", Character.valueOf('\326')); // latin //$NON-NLS-1$ + // capital + // letter O + // with + // diaeresis + entities.put("times", Character.valueOf('\327')); // multiplication //$NON-NLS-1$ + // sign + entities.put("Oslash", Character.valueOf('\330')); // latin //$NON-NLS-1$ + // capital + // letter O + // with + // stroke = + // latin + // capital + // letter O + // slash + entities.put("Ugrave", Character.valueOf('\331')); // latin //$NON-NLS-1$ + // capital + // letter U + // with + // grave + entities.put("Uacute", Character.valueOf('\332')); // latin //$NON-NLS-1$ + // capital + // letter U + // with + // acute + entities.put("Ucirc", Character.valueOf('\333')); // latin //$NON-NLS-1$ + // capital + // letter U + // with + // circumflex + entities.put("Uuml", Character.valueOf('\334')); // latin //$NON-NLS-1$ + // capital + // letter U + // with + // diaeresis + entities.put("Yacute", Character.valueOf('\335')); // latin //$NON-NLS-1$ + // capital + // letter Y + // with + // acute + entities.put("THORN", Character.valueOf('\336')); // latin //$NON-NLS-1$ + // capital + // letter + // THORN + entities.put("szlig", Character.valueOf('\337')); // latin //$NON-NLS-1$ + // small + // letter + // sharp s = + // ess-zed + entities.put("agrave", Character.valueOf('\340')); // latin //$NON-NLS-1$ + // small + // letter a + // with + // grave = + // latin + // small + // letter a + // grave + entities.put("aacute", Character.valueOf('\341')); // latin //$NON-NLS-1$ + // small + // letter a + // with + // acute + entities.put("acirc", Character.valueOf('\342')); // latin //$NON-NLS-1$ + // small + // letter a + // with + // circumflex + entities.put("atilde", Character.valueOf('\343')); // latin //$NON-NLS-1$ + // small + // letter a + // with + // tilde + entities.put("auml", Character.valueOf('\344')); // latin //$NON-NLS-1$ + // small + // letter a + // with + // diaeresis + entities.put("aring", Character.valueOf('\345')); // latin //$NON-NLS-1$ + // small + // letter a + // with ring + // above = + // latin + // small + // letter a + // ring + entities.put("aelig", Character.valueOf('\346')); // latin //$NON-NLS-1$ + // small + // letter ae + // = latin + // small + // ligature + // ae + entities.put("ccedil", Character.valueOf('\347')); // latin //$NON-NLS-1$ + // small + // letter c + // with + // cedilla + entities.put("egrave", Character.valueOf('\350')); // latin //$NON-NLS-1$ + // small + // letter e + // with + // grave + entities.put("eacute", Character.valueOf('\351')); // latin //$NON-NLS-1$ + // small + // letter e + // with + // acute + entities.put("ecirc", Character.valueOf('\352')); // latin //$NON-NLS-1$ + // small + // letter e + // with + // circumflex + entities.put("euml", Character.valueOf('\353')); // latin //$NON-NLS-1$ + // small + // letter e + // with + // diaeresis + entities.put("igrave", Character.valueOf('\354')); // latin //$NON-NLS-1$ + // small + // letter i + // with + // grave + entities.put("iacute", Character.valueOf('\355')); // latin //$NON-NLS-1$ + // small + // letter i + // with + // acute + entities.put("icirc", Character.valueOf('\356')); // latin //$NON-NLS-1$ + // small + // letter i + // with + // circumflex + entities.put("iuml", Character.valueOf('\357')); // latin //$NON-NLS-1$ + // small + // letter i + // with + // diaeresis + entities.put("eth", Character.valueOf('\360')); // latin small //$NON-NLS-1$ + // letter eth + entities.put("ntilde", Character.valueOf('\361')); // latin //$NON-NLS-1$ + // small + // letter n + // with + // tilde + entities.put("ograve", Character.valueOf('\362')); // latin //$NON-NLS-1$ + // small + // letter o + // with + // grave + entities.put("oacute", Character.valueOf('\363')); // latin //$NON-NLS-1$ + // small + // letter o + // with + // acute + entities.put("ocirc", Character.valueOf('\364')); // latin //$NON-NLS-1$ + // small + // letter o + // with + // circumflex + entities.put("otilde", Character.valueOf('\365')); // latin //$NON-NLS-1$ + // small + // letter o + // with + // tilde + entities.put("ouml", Character.valueOf('\366')); // latin //$NON-NLS-1$ + // small + // letter o + // with + // diaeresis + entities.put("divide", Character.valueOf('\367')); // division //$NON-NLS-1$ + // sign + entities.put("oslash", Character.valueOf('\370')); // latin //$NON-NLS-1$ + // small + // letter o + // with + // stroke = + // latin + // small + // letter o + // slash + entities.put("ugrave", Character.valueOf('\371')); // latin //$NON-NLS-1$ + // small + // letter u + // with + // grave + entities.put("uacute", Character.valueOf('\372')); // latin //$NON-NLS-1$ + // small + // letter u + // with + // acute + entities.put("ucirc", Character.valueOf('\373')); // latin //$NON-NLS-1$ + // small + // letter u + // with + // circumflex + entities.put("uuml", Character.valueOf('\374')); // latin //$NON-NLS-1$ + // small + // letter u + // with + // diaeresis + entities.put("yacute", Character.valueOf('\375')); // latin //$NON-NLS-1$ + // small + // letter y + // with + // acute + entities.put("thorn", Character.valueOf('\376')); // latin //$NON-NLS-1$ + // small + // letter + // thorn + entities.put("yuml", Character.valueOf('\377')); // latin //$NON-NLS-1$ + // small + // letter y + // with + // diaeresis + + // Special characters + entities.put("quot", Character.valueOf('\42')); // quotation //$NON-NLS-1$ + // mark = APL + // quote + entities.put("amp", Character.valueOf('\46')); // ampersand //$NON-NLS-1$ + entities.put("lt", Character.valueOf('\74')); // less-than //$NON-NLS-1$ + // sign + entities.put("gt", Character.valueOf('\76')); // greater-than //$NON-NLS-1$ + // sign + // Latin Extended-A + entities.put("OElig", Character.valueOf('\u0152')); // latin //$NON-NLS-1$ + // capital + // ligature + // OE + entities.put("oelig", Character.valueOf('\u0153')); // latin //$NON-NLS-1$ + // small + // ligature + // oe, + // ligature + // is a + // misnomer, + // this is a + // separate + // character + // in some + // languages + entities.put("Scaron", Character.valueOf('\u0160')); // latin //$NON-NLS-1$ + // capital + // letter + // S + // with + // caron + entities.put("scaron", Character.valueOf('\u0161')); // latin //$NON-NLS-1$ + // small + // letter + // s + // with + // caron + entities.put("Yuml", Character.valueOf('\u0178')); // latin //$NON-NLS-1$ + // capital + // letter Y + // with + // diaeresis + // Spacing Modifier Letters + entities.put("circ", Character.valueOf('\u02c6')); // modifier //$NON-NLS-1$ + // letter + // circumflex + // accent + entities.put("tilde", Character.valueOf('\u02dc')); // small //$NON-NLS-1$ + // tilde + // General punctuation + entities.put("ensp", Character.valueOf('\u2002')); // en space //$NON-NLS-1$ + entities.put("emsp", Character.valueOf('\u2003')); // em space //$NON-NLS-1$ + entities.put("thinsp", Character.valueOf('\u2009')); // thin //$NON-NLS-1$ + // space + entities.put("zwnj", Character.valueOf('\u200c')); // zero //$NON-NLS-1$ + // width + // non-joiner + entities.put("zwj", Character.valueOf('\u200d')); // zero //$NON-NLS-1$ + // width + // joiner + entities.put("lrm", Character.valueOf('\u200e')); // left-to-right //$NON-NLS-1$ + // mark + entities.put("rlm", Character.valueOf('\u200f')); // right-to-left //$NON-NLS-1$ + // mark + entities.put("ndash", Character.valueOf('\u2013')); // en dash //$NON-NLS-1$ + entities.put("mdash", Character.valueOf('\u2014')); // em dash //$NON-NLS-1$ + entities.put("lsquo", Character.valueOf('\u2018')); // left //$NON-NLS-1$ + // single + // quotation + // mark + entities.put("rsquo", Character.valueOf('\u2019')); // right //$NON-NLS-1$ + // single + // quotation + // mark + entities.put("sbquo", Character.valueOf('\u201a')); // single //$NON-NLS-1$ + // low-9 + // quotation + // mark + entities.put("ldquo", Character.valueOf('\u201c')); // left //$NON-NLS-1$ + // double + // quotation + // mark + entities.put("rdquo", Character.valueOf('\u201d')); // right //$NON-NLS-1$ + // double + // quotation + // mark + entities.put("bdquo", Character.valueOf('\u201e')); // double //$NON-NLS-1$ + // low-9 + // quotation + // mark + entities.put("dagger", Character.valueOf('\u2020')); // dagger //$NON-NLS-1$ + entities.put("Dagger", Character.valueOf('\u2021')); // double //$NON-NLS-1$ + // dagger + entities.put("permil", Character.valueOf('\u2030')); // per //$NON-NLS-1$ + // mille + // sign + entities.put("lsaquo", Character.valueOf('\u2039')); // single //$NON-NLS-1$ + // left-pointing + // angle + // quotation + // mark, + // not + // yet + // standardized + entities.put("rsaquo", Character.valueOf('\u203a')); // single //$NON-NLS-1$ + // right-pointing + // angle + // quotation + // mark, + // not + // yet + // standardized + entities.put("euro", Character.valueOf('\u20ac')); // euro sign //$NON-NLS-1$ + } +} diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java new file mode 100644 index 0000000..e03f1ac --- a/dev/null +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java @@ -0,0 +1,374 @@ +/******************************************************************************* + * Copyright (c) 2004, 2009 Tasktop Technologies and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Tasktop Technologies - initial API and implementation + *******************************************************************************/ + +package org.eclipse.mylyn.commons.core; + +import java.net.URL; +import java.text.ParseException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Locale; + +import javax.swing.text.html.HTML.Tag; + +/** + * Class representing an HTML (3.2) tag and its attributes. + * + * @author Shawn Minto + * @since 3.7 + */ +public class HtmlTag { + /** tag's name */ + private String tagName; + + /** tag type enum */ + private Tag tagType; + + /** true if the tag is a closing tag */ + private boolean isEndTag; + + /** tag's attributes (keys are lowercase attribute names) */ + private HashMap<String, String> attributes; + + /** tag's base url */ + private final URL baseUrl; + + /** tag is self terminated */ + private boolean selfTerminating; + + /** + * Basic constructor. The tag is uninitialized. + */ + public HtmlTag() { + tagName = null; + tagType = Type.UNKNOWN; + isEndTag = false; + attributes = new HashMap<String, String>(); + baseUrl = null; + } + + /** + * Copy constructor. + */ + @SuppressWarnings("unchecked") + public HtmlTag(HtmlTag htmltag) { + tagName = null; + tagType = Type.UNKNOWN; + isEndTag = false; + attributes = new HashMap<String, String>(); + tagName = htmltag.tagName; + baseUrl = htmltag.baseUrl; + tagType = htmltag.tagType; + isEndTag = htmltag.isEndTag; + attributes = (HashMap<String, String>) htmltag.attributes.clone(); + } + + /** + * Constructor. + */ + public HtmlTag(String s) throws ParseException { + attributes = new HashMap<String, String>(); + setTagName(s); + baseUrl = null; + } + + /** + * Constructor creating an otherwise empty tag, but with a given base url. + */ + public HtmlTag(URL url) { + tagName = null; + tagType = Type.UNKNOWN; + isEndTag = false; + attributes = new HashMap<String, String>(); + baseUrl = url; + } + + /** + * Returns the tag's type (linked to the tag's name). + */ + public Tag getTagType() { + return tagType; + } + + /** + * Returns the tag's name (e.g., "HEAD", "P", etc.). + */ + public String getTagName() { + return tagName; + } + + /** + * Sets the tag's name and type, if known. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code> or empty string + */ + public void setTagName(String s) throws IllegalArgumentException { + if (s == null || s.length() == 0) { + throw new IllegalArgumentException("Empty tag name"); //$NON-NLS-1$ + } + if (s.charAt(0) == '/') { + isEndTag = true; + s = s.substring(1); + } + if (s.length() == 0) { + throw new IllegalArgumentException("Empty tag name"); //$NON-NLS-1$ + } + tagName = s; + tagType = tags.get(s.toUpperCase(Locale.ENGLISH)); + if (tagType == null) { + tagType = Type.UNKNOWN; + } + } + + /** + * Returns <code>true</code> if the tag is a closing tag. + */ + public boolean isEndTag() { + return isEndTag; + } + + /** + * Returns the value of a tag's attribute as an integer. + */ + public int getIntAttribute(String s) throws NumberFormatException { + return Integer.parseInt(getAttribute(s)); + } + + /** + * Returns the value of a tag's attribute, or NULL if it doesn't exist. + */ + public String getAttribute(String s) { + return attributes.get(s); + } + + /** + * Returns <code>true</code> if the tag contains attribute with the given name. + */ + public boolean hasAttribute(String s) { + return getAttribute(s) != null; + } + + /** + * Sets the value of a tag's attribute. + */ + public void setAttribute(String name, String value) { + attributes.put(name.toLowerCase(Locale.ENGLISH), value); + } + + public StringBuffer getURLs() { + StringBuffer sb = new StringBuffer(); + + Iterator<String> attributeNames = attributes.keySet().iterator(); + Iterator<String> attributeValues = attributes.values().iterator(); + while (attributeNames.hasNext()) { + String attributeName = attributeNames.next(); + if (attributeName.compareTo("href") == 0 || attributeName.compareTo("src") == 0) { //$NON-NLS-1$ //$NON-NLS-2$ + String target = attributeValues.next(); + if (!target.endsWith(".jpg") && !target.endsWith(".gif") && !target.endsWith(".css") //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + && !target.endsWith(".js") && !target.startsWith("mailto") && target.lastIndexOf("#") == -1 //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + && target.length() > 0) { + + for (int i = 0; i < target.length(); i++) { + char ch = target.charAt(i); + if (!Character.isWhitespace(ch)) { + if (i > 0) { + target = target.substring(i + 1); + } + break; + } + } + target = target.replace('\\', '/'); + + if (target.startsWith("news:") || (target.indexOf("://") != -1 && target.length() >= 7)) { //$NON-NLS-1$ //$NON-NLS-2$ + // Absolute URL + if (target.substring(0, 7).compareToIgnoreCase("http://") == 0) { //$NON-NLS-1$ + sb.append(target); + } + } else { + // Relative URL + + String baseDir = baseUrl.getPath(); + int lastSep = -1; + for (int i = 0; i < baseDir.length(); i++) { + char ch = baseDir.charAt(i); + if (ch == '/') { + lastSep = i; + } else if (ch == '?') { + break; + } + } + if (lastSep >= 0) { + baseDir = baseDir.substring(0, lastSep); + } + while (baseDir.length() > 1 && baseDir.endsWith("/.")) { //$NON-NLS-1$ + baseDir = baseDir.substring(0, baseDir.length() - 2); + } + + if (target.startsWith("//")) { //$NON-NLS-1$ + sb.append(baseUrl.getProtocol() + ":" + target); //$NON-NLS-1$ + } else if (target.startsWith("/")) { //$NON-NLS-1$ + sb.append(baseUrl.getProtocol() + "://" + baseUrl.getHost() + target); //$NON-NLS-1$ + } else { + while (target.startsWith("../")) { //$NON-NLS-1$ + if (baseDir.length() > 0) { + // can't go above root + baseDir = baseDir.substring(0, baseDir.lastIndexOf("/")); //$NON-NLS-1$ + } + target = target.substring(3); + } + sb.append(baseUrl.getProtocol() + "://" + baseUrl.getHost() + baseDir + "/" + target); //$NON-NLS-1$ //$NON-NLS-2$ + } + } + } + } else { + attributeValues.next(); + } + } + + return sb; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append('<'); + if (isEndTag) { + sb.append('/'); + } + sb.append(tagName); + Iterator<String> keys = attributes.keySet().iterator(); + Iterator<String> values = attributes.values().iterator(); + while (keys.hasNext()) { + String name = keys.next(); + sb.append(' '); + sb.append(name); + String value = values.next(); + sb.append("=\""); //$NON-NLS-1$ + if (value.length() > 0) { + sb.append(value); + } + sb.append('"'); + } + if (selfTerminating) { + sb.append('/'); + } + sb.append('>'); + + return sb.toString(); + } + + /** + * Enum class for tag types. + */ + public static class Type extends Tag { + public static final Tag UNKNOWN = new Tag(); + + public static final Tag THEAD = new Type("THEAD"); //$NON-NLS-1$ + + public static final Tag DOCTYPE = new Type("!DOCTYPE"); //$NON-NLS-1$ + + public static final Tag LABEL = new Type("LABEL"); //$NON-NLS-1$ + + private Type(String name) { + super(name); + } + } + + private static HashMap<String, Tag> tags; + static { + tags = new HashMap<String, Tag>(); + tags.put("A", Tag.A); //$NON-NLS-1$ + tags.put("ADDRESS", Tag.ADDRESS); //$NON-NLS-1$ + tags.put("APPLET", Tag.APPLET); //$NON-NLS-1$ + tags.put("AREA", Tag.AREA); //$NON-NLS-1$ + tags.put("B", Tag.B); //$NON-NLS-1$ + tags.put("BASE", Tag.BASE); //$NON-NLS-1$ + tags.put("BASEFONT", Tag.BASEFONT); //$NON-NLS-1$ + tags.put("BIG", Tag.BIG); //$NON-NLS-1$ + tags.put("BLOCKQUOTE", Tag.BLOCKQUOTE); //$NON-NLS-1$ + tags.put("BODY", Tag.BODY); //$NON-NLS-1$ + tags.put("BR", Tag.BR); //$NON-NLS-1$ + tags.put("CAPTION", Tag.CAPTION); //$NON-NLS-1$ + tags.put("CENTER", Tag.CENTER); //$NON-NLS-1$ + tags.put("CITE", Tag.CITE); //$NON-NLS-1$ + tags.put("CODE", Tag.CODE); //$NON-NLS-1$ + tags.put("DD", Tag.DD); //$NON-NLS-1$ + tags.put("DFN", Tag.DFN); //$NON-NLS-1$ + tags.put("DIR", Tag.DIR); //$NON-NLS-1$ + tags.put("DIV", Tag.DIV); //$NON-NLS-1$ + tags.put("DL", Tag.DL); //$NON-NLS-1$ + tags.put("!DOCTYPE", Type.DOCTYPE); //$NON-NLS-1$ + tags.put("DT", Tag.DT); //$NON-NLS-1$ + tags.put("EM", Tag.EM); //$NON-NLS-1$ + tags.put("FONT", Tag.FONT); //$NON-NLS-1$ + tags.put("FORM", Tag.FORM); //$NON-NLS-1$ + tags.put("FRAME", Tag.FRAME); //$NON-NLS-1$ + tags.put("FRAMESET", Tag.FRAMESET); //$NON-NLS-1$ + tags.put("H1", Tag.H1); //$NON-NLS-1$ + tags.put("H2", Tag.H2); //$NON-NLS-1$ + tags.put("H3", Tag.H3); //$NON-NLS-1$ + tags.put("H4", Tag.H4); //$NON-NLS-1$ + tags.put("H5", Tag.H5); //$NON-NLS-1$ + tags.put("H6", Tag.H6); //$NON-NLS-1$ + tags.put("HEAD", Tag.HEAD); //$NON-NLS-1$ + tags.put("HTML", Tag.HTML); //$NON-NLS-1$ + tags.put("HR", Tag.HR); //$NON-NLS-1$ + tags.put("I", Tag.I); //$NON-NLS-1$ + tags.put("IMG", Tag.IMG); //$NON-NLS-1$ + tags.put("INPUT", Tag.INPUT); //$NON-NLS-1$ + tags.put("ISINDEX", Tag.ISINDEX); //$NON-NLS-1$ + tags.put("KBD", Tag.KBD); //$NON-NLS-1$ + tags.put("LI", Tag.LI); //$NON-NLS-1$ + tags.put("LABEL", Type.LABEL); //$NON-NLS-1$ + tags.put("LINK", Tag.LINK); //$NON-NLS-1$ + tags.put("MAP", Tag.MAP); //$NON-NLS-1$ + tags.put("MENU", Tag.MENU); //$NON-NLS-1$ + tags.put("META", Tag.META); //$NON-NLS-1$ + tags.put("NOFRAMES", Tag.NOFRAMES); //$NON-NLS-1$ + tags.put("OBJECT", Tag.OBJECT); //$NON-NLS-1$ + tags.put("OL", Tag.OL); //$NON-NLS-1$ + tags.put("OPTION", Tag.OPTION); //$NON-NLS-1$ + tags.put("P", Tag.P); //$NON-NLS-1$ + tags.put("PARAM", Tag.PARAM); //$NON-NLS-1$ + tags.put("PRE", Tag.PRE); //$NON-NLS-1$ + tags.put("S", Tag.S); //$NON-NLS-1$ + tags.put("SAMP", Tag.SAMP); //$NON-NLS-1$ + tags.put("SCRIPT", Tag.SCRIPT); //$NON-NLS-1$ + tags.put("SELECT", Tag.SELECT); //$NON-NLS-1$ + tags.put("SMALL", Tag.SMALL); //$NON-NLS-1$ + tags.put("SPAN", Tag.SPAN); //$NON-NLS-1$ + tags.put("STRONG", Tag.STRONG); //$NON-NLS-1$ + tags.put("STYLE", Tag.STYLE); //$NON-NLS-1$ + tags.put("SUB", Tag.SUB); //$NON-NLS-1$ + tags.put("SUP", Tag.SUP); //$NON-NLS-1$ + tags.put("TABLE", Tag.TABLE); //$NON-NLS-1$ + tags.put("TD", Tag.TD); //$NON-NLS-1$ + tags.put("TEXTAREA", Tag.TEXTAREA); //$NON-NLS-1$ + tags.put("TH", Tag.TH); //$NON-NLS-1$ + tags.put("THEAD", Type.THEAD); //$NON-NLS-1$ + tags.put("TITLE", Tag.TITLE); //$NON-NLS-1$ + tags.put("TR", Tag.TR); //$NON-NLS-1$ + tags.put("TT", Tag.TT); //$NON-NLS-1$ + tags.put("U", Tag.U); //$NON-NLS-1$ + tags.put("UL", Tag.UL); //$NON-NLS-1$ + tags.put("VAR", Tag.VAR); //$NON-NLS-1$ + } + + public void setSelfTerminating(boolean terminating) { + this.selfTerminating = terminating; + + } + + public boolean isSelfTerminating() { + return selfTerminating; + } +} diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlUtil.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlUtil.java index 471fb54..c24e6f9 100644 --- a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlUtil.java +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlUtil.java @@ -13,7 +13,9 @@ package org.eclipse.mylyn.commons.core; import java.io.IOException; import java.io.StringReader; +import java.text.ParseException; +import org.eclipse.mylyn.commons.core.HtmlStreamTokenizer.Token; import org.eclipse.mylyn.internal.commons.core.Html2TextReader; /** @@ -41,4 +43,22 @@ public class HtmlUtil { return sb.toString(); } + /** + * @since 3.7 + */ + public static String getTextContent(HtmlStreamTokenizer tokenizer) throws IOException, ParseException { + StringBuilder sb = new StringBuilder(); + for (Token token = tokenizer.nextToken(); token.getType() != Token.EOF; token = tokenizer.nextToken()) { + if (token.getType() == Token.TEXT) { + sb.append(token.toString().trim()); + sb.append(" "); //$NON-NLS-1$ + } else if (token.getType() == Token.COMMENT) { + // ignore + } else { + break; + } + } + return sb.toString().trim(); + } + } diff --git a/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlStreamTokenizer.java b/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlStreamTokenizer.java index f3194c4..c374b44 100644 --- a/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlStreamTokenizer.java +++ b/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlStreamTokenizer.java @@ -26,7 +26,9 @@ import org.apache.commons.lang.StringEscapeUtils; * * @author Shawn Minto * @since 2.0 + * @deprecated use org.eclipse.mylyn.commons.core.HtmlStreamTokenizer instead. */ +@Deprecated public class HtmlStreamTokenizer { /** parser state */ diff --git a/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlTag.java b/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlTag.java index f890a56..bf761f4 100644 --- a/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlTag.java +++ b/org.eclipse.mylyn.commons.net/src/org/eclipse/mylyn/commons/net/HtmlTag.java @@ -22,9 +22,11 @@ import javax.swing.text.html.HTML.Tag; /** * Class representing an HTML (3.2) tag and its attributes. * + * @deprecated use org.eclipse.mylyn.commons.core.HtmlTag instead. * @author Shawn Minto * @since 2.0 */ +@Deprecated public class HtmlTag { /** tag's name */ private String tagName; diff --git a/org.eclipse.mylyn.commons.repositories.http.core/src/org/eclipse/mylyn/commons/repositories/http/core/CommonHttpClient.java b/org.eclipse.mylyn.commons.repositories.http.core/src/org/eclipse/mylyn/commons/repositories/http/core/CommonHttpClient.java index 87404aa..47725df 100644 --- a/org.eclipse.mylyn.commons.repositories.http.core/src/org/eclipse/mylyn/commons/repositories/http/core/CommonHttpClient.java +++ b/org.eclipse.mylyn.commons.repositories.http.core/src/org/eclipse/mylyn/commons/repositories/http/core/CommonHttpClient.java @@ -19,6 +19,7 @@ import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.conn.ClientConnectionManager; import org.apache.http.impl.client.AbstractHttpClient; import org.apache.http.impl.client.ContentEncodingHttpClient; +import org.apache.http.protocol.HttpContext; import org.apache.http.protocol.SyncBasicHttpContext; import org.eclipse.core.runtime.IProgressMonitor; import org.eclipse.mylyn.commons.core.operations.IOperationMonitor; @@ -50,6 +51,10 @@ public class CommonHttpClient { return HttpUtil.execute(getHttpClient(), HttpUtil.createHost(request), context, request, monitor); } + public HttpContext getContext() { + return context; + } + public synchronized AbstractHttpClient getHttpClient() { if (httpClient == null) { httpClient = createHttpClient(null); |

