1 files changed, 0 insertions, 212 deletions
diff --git a/extraplugins/epf-richtext/org.eclipse.epf.common.html/src/org/eclipse/epf/common/html/DefaultHTMLParser.java b/extraplugins/epf-richtext/org.eclipse.epf.common.html/src/org/eclipse/epf/common/html/DefaultHTMLParser.java
deleted file mode 100644
index 9e99d55b3ed..00000000000
--- a/extraplugins/epf-richtext/org.eclipse.epf.common.html/src/org/eclipse/epf/common/html/DefaultHTMLParser.java
+++ /dev/null
@@ -1,212 +0,0 @@
-//------------------------------------------------------------------------------
-// Copyright (c) 2005, 2006 IBM Corporation and others.
-// All rights reserved. This program and the accompanying materials
-// are made available under the terms of the Eclipse Public License v1.0
-// which accompanies this distribution, and is available at
-// http://www.eclipse.org/legal/epl-v10.html
-//
-// Contributors:
-// IBM Corporation - initial implementation
-//------------------------------------------------------------------------------
-package org.eclipse.epf.common.html;
-
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStreamReader;
-import java.io.PrintWriter;
-import java.io.StringWriter;
-import java.util.Properties;
-
-import org.eclipse.epf.common.IHTMLParser;
-import org.w3c.dom.Document;
-import org.w3c.dom.NamedNodeMap;
-import org.w3c.dom.Node;
-import org.w3c.dom.NodeList;
-import org.w3c.tidy.Configuration;
-import org.w3c.tidy.Tidy;
-
-/**
- * Extracts the title, meta tags and text from a HTML file or source.
- * 
- * @author Kelvin Low
- * @since 1.0
- */
-public class DefaultHTMLParser implements IHTMLParser{
-
-	private static final int BUFFER_SIZE = 4096;
-
-	private static final String HTML_SCRIPT_TAG = "script"; //$NON-NLS-1$	
-
-	private static final String HTML_TITLE_TAG = "title"; //$NON-NLS-1$
-
-	private static final String HTML_META_TAG = "meta"; //$NON-NLS-1$
-
-	protected Tidy tidy;
-
-	private String title;
-
-	private String summary;
-
-	private String text;
-
-	private Properties metaTags;
-
-	private StringBuffer htmlText;
-
-	/**
-	 * Creates a new instance.
-	 */
-	public DefaultHTMLParser() {
-		try {
-			tidy = new Tidy();
-			tidy.setXHTML(true);
-			tidy.setDropEmptyParas(true);
-			tidy.setDropFontTags(true);
-			tidy.setQuiet(true);
-			tidy.setShowWarnings(false);
-			tidy.setSmartIndent(false);
-			tidy.setTidyMark(false);
-			tidy.setWraplen(132);
-			tidy.setIndentAttributes(false);
-			tidy.setIndentContent(false);
-			tidy.setSpaces(2);
-			tidy.setCharEncoding(Configuration.ISO2022);
-//			tidy.setInputEncoding("UTF-8"); //$NON-NLS-1$
-//			tidy.setOutputEncoding("UTF-8"); //$NON-NLS-1$
-		} catch (Exception e) {
-			tidy = null;
-		}
-	}
-
-	/**
-	 * Parses the given HTML file.
-	 */
-	public void parse(File file) throws Exception {
-		if (tidy == null || !file.exists() || !file.canRead()) {
-			return;
-		}
-
-		FileInputStream fis = new FileInputStream(file);
-		InputStreamReader isr = new InputStreamReader(fis, "UTF-8"); //$NON-NLS-1$
-		BufferedReader br = new BufferedReader(isr);
-
-		StringBuffer textBuffer = new StringBuffer(BUFFER_SIZE);
-		char[] buffer = new char[BUFFER_SIZE];
-		int charsRead;
-		while ((charsRead = br.read(buffer, 0, BUFFER_SIZE)) > 0) {
-			textBuffer.append(buffer, 0, charsRead);
-		}
-
-		parse(textBuffer.toString());
-
-		if (br != null) {
-			try {
-				br.close();
-			} catch (IOException e) {
-			}
-		}
-	}
-
-	/**
-	 * Parses the given HTML source.
-	 */
-	protected void parse(String htmlSource) throws Exception {
-		title = ""; //$NON-NLS-1$
-		summary = ""; //$NON-NLS-1$
-		text = ""; //$NON-NLS-1$
-		metaTags = new Properties();
-
-		Document doc = getDocument(htmlSource);
-		if (doc != null) {
-			htmlText = new StringBuffer(1024);
-			extract(doc.getChildNodes());
-			text = htmlText.toString();
-		}
-	}
-
-	/**
-	 * Returns the title text.
-	 */
-	public String getTitle() {
-		return title;
-	}
-
-	/**
-	 * Returns the HTML meta tags.
-	 */
-	public Properties getMetaTags() {
-		return metaTags;
-	}
-
-	/**
-	 * Returns the summary.
-	 */
-	public String getSummary() {
-		return summary;
-	}
-
-	/**
-	 * Returns the body text.
-	 */
-	public String getText() {
-		return text;
-	}
-
-	/**
-	 * Returns the DOM document for the given HTML source.
-	 */
-	protected Document getDocument(String html) throws Exception {
-		if (html == null || html.length() == 0) {
-			return null;
-		}
-
-		ByteArrayInputStream input = new ByteArrayInputStream(html
-				.getBytes("UTF-8")); //$NON-NLS-1$	
-		ByteArrayOutputStream output = new ByteArrayOutputStream();
-
-		StringWriter sw = new StringWriter();
-		PrintWriter pw = new PrintWriter(sw);
-		tidy.setErrout(pw);
-
-		return tidy.parseDOM(input, output);
-	}
-
-	/**
-	 * Extracts the title, meta tags and body text from the given nodes.
-	 */
-	protected void extract(NodeList nodes) {
-		for (int i = 0; i < nodes.getLength(); i++) {
-			Node node = nodes.item(i);
-			String nodeName = node.getNodeName();
-			switch (node.getNodeType()) {
-			case Node.ELEMENT_NODE:
-				if (!nodeName.equals(HTML_SCRIPT_TAG)) {
-					NamedNodeMap attrs = node.getAttributes();
-					for (int j = 0; j < attrs.getLength(); j++) {
-						Node attrNode = attrs.item(j);
-						String attrNodeName = attrNode.getNodeName();
-						String attrNodeValue = attrNode.getNodeValue();
-						if (attrNodeName.equals(HTML_TITLE_TAG)) {
-							title = attrNodeValue;
-						} else if (attrNodeName.equals(HTML_META_TAG)) {
-							metaTags.put(attrNodeName, attrNodeValue);
-						}
-					}
-					NodeList childNodes = node.getChildNodes();
-					if (childNodes != null && childNodes.getLength() > 0) {
-						extract(childNodes);
-					}
-				}
-				break;
-			case Node.TEXT_NODE:
-				htmlText.append(node.getNodeValue()).append(' ');
-				break;
-			}
-		}
-	}
-
-}