diff options
Diffstat (limited to 'org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java')
-rw-r--r-- | org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java | 374 |
1 files changed, 374 insertions, 0 deletions
diff --git a/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java new file mode 100644 index 00000000..e03f1acd --- /dev/null +++ b/org.eclipse.mylyn.commons.core/src/org/eclipse/mylyn/commons/core/HtmlTag.java @@ -0,0 +1,374 @@ +/******************************************************************************* + * Copyright (c) 2004, 2009 Tasktop Technologies and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * Contributors: + * Tasktop Technologies - initial API and implementation + *******************************************************************************/ + +package org.eclipse.mylyn.commons.core; + +import java.net.URL; +import java.text.ParseException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Locale; + +import javax.swing.text.html.HTML.Tag; + +/** + * Class representing an HTML (3.2) tag and its attributes. + * + * @author Shawn Minto + * @since 3.7 + */ +public class HtmlTag { + /** tag's name */ + private String tagName; + + /** tag type enum */ + private Tag tagType; + + /** true if the tag is a closing tag */ + private boolean isEndTag; + + /** tag's attributes (keys are lowercase attribute names) */ + private HashMap<String, String> attributes; + + /** tag's base url */ + private final URL baseUrl; + + /** tag is self terminated */ + private boolean selfTerminating; + + /** + * Basic constructor. The tag is uninitialized. + */ + public HtmlTag() { + tagName = null; + tagType = Type.UNKNOWN; + isEndTag = false; + attributes = new HashMap<String, String>(); + baseUrl = null; + } + + /** + * Copy constructor. + */ + @SuppressWarnings("unchecked") + public HtmlTag(HtmlTag htmltag) { + tagName = null; + tagType = Type.UNKNOWN; + isEndTag = false; + attributes = new HashMap<String, String>(); + tagName = htmltag.tagName; + baseUrl = htmltag.baseUrl; + tagType = htmltag.tagType; + isEndTag = htmltag.isEndTag; + attributes = (HashMap<String, String>) htmltag.attributes.clone(); + } + + /** + * Constructor. + */ + public HtmlTag(String s) throws ParseException { + attributes = new HashMap<String, String>(); + setTagName(s); + baseUrl = null; + } + + /** + * Constructor creating an otherwise empty tag, but with a given base url. + */ + public HtmlTag(URL url) { + tagName = null; + tagType = Type.UNKNOWN; + isEndTag = false; + attributes = new HashMap<String, String>(); + baseUrl = url; + } + + /** + * Returns the tag's type (linked to the tag's name). + */ + public Tag getTagType() { + return tagType; + } + + /** + * Returns the tag's name (e.g., "HEAD", "P", etc.). + */ + public String getTagName() { + return tagName; + } + + /** + * Sets the tag's name and type, if known. + * + * @throws IllegalArgumentException + * if the argument is <code>null</code> or empty string + */ + public void setTagName(String s) throws IllegalArgumentException { + if (s == null || s.length() == 0) { + throw new IllegalArgumentException("Empty tag name"); //$NON-NLS-1$ + } + if (s.charAt(0) == '/') { + isEndTag = true; + s = s.substring(1); + } + if (s.length() == 0) { + throw new IllegalArgumentException("Empty tag name"); //$NON-NLS-1$ + } + tagName = s; + tagType = tags.get(s.toUpperCase(Locale.ENGLISH)); + if (tagType == null) { + tagType = Type.UNKNOWN; + } + } + + /** + * Returns <code>true</code> if the tag is a closing tag. + */ + public boolean isEndTag() { + return isEndTag; + } + + /** + * Returns the value of a tag's attribute as an integer. + */ + public int getIntAttribute(String s) throws NumberFormatException { + return Integer.parseInt(getAttribute(s)); + } + + /** + * Returns the value of a tag's attribute, or NULL if it doesn't exist. + */ + public String getAttribute(String s) { + return attributes.get(s); + } + + /** + * Returns <code>true</code> if the tag contains attribute with the given name. + */ + public boolean hasAttribute(String s) { + return getAttribute(s) != null; + } + + /** + * Sets the value of a tag's attribute. + */ + public void setAttribute(String name, String value) { + attributes.put(name.toLowerCase(Locale.ENGLISH), value); + } + + public StringBuffer getURLs() { + StringBuffer sb = new StringBuffer(); + + Iterator<String> attributeNames = attributes.keySet().iterator(); + Iterator<String> attributeValues = attributes.values().iterator(); + while (attributeNames.hasNext()) { + String attributeName = attributeNames.next(); + if (attributeName.compareTo("href") == 0 || attributeName.compareTo("src") == 0) { //$NON-NLS-1$ //$NON-NLS-2$ + String target = attributeValues.next(); + if (!target.endsWith(".jpg") && !target.endsWith(".gif") && !target.endsWith(".css") //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + && !target.endsWith(".js") && !target.startsWith("mailto") && target.lastIndexOf("#") == -1 //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ + && target.length() > 0) { + + for (int i = 0; i < target.length(); i++) { + char ch = target.charAt(i); + if (!Character.isWhitespace(ch)) { + if (i > 0) { + target = target.substring(i + 1); + } + break; + } + } + target = target.replace('\\', '/'); + + if (target.startsWith("news:") || (target.indexOf("://") != -1 && target.length() >= 7)) { //$NON-NLS-1$ //$NON-NLS-2$ + // Absolute URL + if (target.substring(0, 7).compareToIgnoreCase("http://") == 0) { //$NON-NLS-1$ + sb.append(target); + } + } else { + // Relative URL + + String baseDir = baseUrl.getPath(); + int lastSep = -1; + for (int i = 0; i < baseDir.length(); i++) { + char ch = baseDir.charAt(i); + if (ch == '/') { + lastSep = i; + } else if (ch == '?') { + break; + } + } + if (lastSep >= 0) { + baseDir = baseDir.substring(0, lastSep); + } + while (baseDir.length() > 1 && baseDir.endsWith("/.")) { //$NON-NLS-1$ + baseDir = baseDir.substring(0, baseDir.length() - 2); + } + + if (target.startsWith("//")) { //$NON-NLS-1$ + sb.append(baseUrl.getProtocol() + ":" + target); //$NON-NLS-1$ + } else if (target.startsWith("/")) { //$NON-NLS-1$ + sb.append(baseUrl.getProtocol() + "://" + baseUrl.getHost() + target); //$NON-NLS-1$ + } else { + while (target.startsWith("../")) { //$NON-NLS-1$ + if (baseDir.length() > 0) { + // can't go above root + baseDir = baseDir.substring(0, baseDir.lastIndexOf("/")); //$NON-NLS-1$ + } + target = target.substring(3); + } + sb.append(baseUrl.getProtocol() + "://" + baseUrl.getHost() + baseDir + "/" + target); //$NON-NLS-1$ //$NON-NLS-2$ + } + } + } + } else { + attributeValues.next(); + } + } + + return sb; + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append('<'); + if (isEndTag) { + sb.append('/'); + } + sb.append(tagName); + Iterator<String> keys = attributes.keySet().iterator(); + Iterator<String> values = attributes.values().iterator(); + while (keys.hasNext()) { + String name = keys.next(); + sb.append(' '); + sb.append(name); + String value = values.next(); + sb.append("=\""); //$NON-NLS-1$ + if (value.length() > 0) { + sb.append(value); + } + sb.append('"'); + } + if (selfTerminating) { + sb.append('/'); + } + sb.append('>'); + + return sb.toString(); + } + + /** + * Enum class for tag types. + */ + public static class Type extends Tag { + public static final Tag UNKNOWN = new Tag(); + + public static final Tag THEAD = new Type("THEAD"); //$NON-NLS-1$ + + public static final Tag DOCTYPE = new Type("!DOCTYPE"); //$NON-NLS-1$ + + public static final Tag LABEL = new Type("LABEL"); //$NON-NLS-1$ + + private Type(String name) { + super(name); + } + } + + private static HashMap<String, Tag> tags; + static { + tags = new HashMap<String, Tag>(); + tags.put("A", Tag.A); //$NON-NLS-1$ + tags.put("ADDRESS", Tag.ADDRESS); //$NON-NLS-1$ + tags.put("APPLET", Tag.APPLET); //$NON-NLS-1$ + tags.put("AREA", Tag.AREA); //$NON-NLS-1$ + tags.put("B", Tag.B); //$NON-NLS-1$ + tags.put("BASE", Tag.BASE); //$NON-NLS-1$ + tags.put("BASEFONT", Tag.BASEFONT); //$NON-NLS-1$ + tags.put("BIG", Tag.BIG); //$NON-NLS-1$ + tags.put("BLOCKQUOTE", Tag.BLOCKQUOTE); //$NON-NLS-1$ + tags.put("BODY", Tag.BODY); //$NON-NLS-1$ + tags.put("BR", Tag.BR); //$NON-NLS-1$ + tags.put("CAPTION", Tag.CAPTION); //$NON-NLS-1$ + tags.put("CENTER", Tag.CENTER); //$NON-NLS-1$ + tags.put("CITE", Tag.CITE); //$NON-NLS-1$ + tags.put("CODE", Tag.CODE); //$NON-NLS-1$ + tags.put("DD", Tag.DD); //$NON-NLS-1$ + tags.put("DFN", Tag.DFN); //$NON-NLS-1$ + tags.put("DIR", Tag.DIR); //$NON-NLS-1$ + tags.put("DIV", Tag.DIV); //$NON-NLS-1$ + tags.put("DL", Tag.DL); //$NON-NLS-1$ + tags.put("!DOCTYPE", Type.DOCTYPE); //$NON-NLS-1$ + tags.put("DT", Tag.DT); //$NON-NLS-1$ + tags.put("EM", Tag.EM); //$NON-NLS-1$ + tags.put("FONT", Tag.FONT); //$NON-NLS-1$ + tags.put("FORM", Tag.FORM); //$NON-NLS-1$ + tags.put("FRAME", Tag.FRAME); //$NON-NLS-1$ + tags.put("FRAMESET", Tag.FRAMESET); //$NON-NLS-1$ + tags.put("H1", Tag.H1); //$NON-NLS-1$ + tags.put("H2", Tag.H2); //$NON-NLS-1$ + tags.put("H3", Tag.H3); //$NON-NLS-1$ + tags.put("H4", Tag.H4); //$NON-NLS-1$ + tags.put("H5", Tag.H5); //$NON-NLS-1$ + tags.put("H6", Tag.H6); //$NON-NLS-1$ + tags.put("HEAD", Tag.HEAD); //$NON-NLS-1$ + tags.put("HTML", Tag.HTML); //$NON-NLS-1$ + tags.put("HR", Tag.HR); //$NON-NLS-1$ + tags.put("I", Tag.I); //$NON-NLS-1$ + tags.put("IMG", Tag.IMG); //$NON-NLS-1$ + tags.put("INPUT", Tag.INPUT); //$NON-NLS-1$ + tags.put("ISINDEX", Tag.ISINDEX); //$NON-NLS-1$ + tags.put("KBD", Tag.KBD); //$NON-NLS-1$ + tags.put("LI", Tag.LI); //$NON-NLS-1$ + tags.put("LABEL", Type.LABEL); //$NON-NLS-1$ + tags.put("LINK", Tag.LINK); //$NON-NLS-1$ + tags.put("MAP", Tag.MAP); //$NON-NLS-1$ + tags.put("MENU", Tag.MENU); //$NON-NLS-1$ + tags.put("META", Tag.META); //$NON-NLS-1$ + tags.put("NOFRAMES", Tag.NOFRAMES); //$NON-NLS-1$ + tags.put("OBJECT", Tag.OBJECT); //$NON-NLS-1$ + tags.put("OL", Tag.OL); //$NON-NLS-1$ + tags.put("OPTION", Tag.OPTION); //$NON-NLS-1$ + tags.put("P", Tag.P); //$NON-NLS-1$ + tags.put("PARAM", Tag.PARAM); //$NON-NLS-1$ + tags.put("PRE", Tag.PRE); //$NON-NLS-1$ + tags.put("S", Tag.S); //$NON-NLS-1$ + tags.put("SAMP", Tag.SAMP); //$NON-NLS-1$ + tags.put("SCRIPT", Tag.SCRIPT); //$NON-NLS-1$ + tags.put("SELECT", Tag.SELECT); //$NON-NLS-1$ + tags.put("SMALL", Tag.SMALL); //$NON-NLS-1$ + tags.put("SPAN", Tag.SPAN); //$NON-NLS-1$ + tags.put("STRONG", Tag.STRONG); //$NON-NLS-1$ + tags.put("STYLE", Tag.STYLE); //$NON-NLS-1$ + tags.put("SUB", Tag.SUB); //$NON-NLS-1$ + tags.put("SUP", Tag.SUP); //$NON-NLS-1$ + tags.put("TABLE", Tag.TABLE); //$NON-NLS-1$ + tags.put("TD", Tag.TD); //$NON-NLS-1$ + tags.put("TEXTAREA", Tag.TEXTAREA); //$NON-NLS-1$ + tags.put("TH", Tag.TH); //$NON-NLS-1$ + tags.put("THEAD", Type.THEAD); //$NON-NLS-1$ + tags.put("TITLE", Tag.TITLE); //$NON-NLS-1$ + tags.put("TR", Tag.TR); //$NON-NLS-1$ + tags.put("TT", Tag.TT); //$NON-NLS-1$ + tags.put("U", Tag.U); //$NON-NLS-1$ + tags.put("UL", Tag.UL); //$NON-NLS-1$ + tags.put("VAR", Tag.VAR); //$NON-NLS-1$ + } + + public void setSelfTerminating(boolean terminating) { + this.selfTerminating = terminating; + + } + + public boolean isSelfTerminating() { + return selfTerminating; + } +} |