diff options
Diffstat (limited to 'bundles/org.eclipse.wst.ws.parser/src/org/eclipse/wst/ws/internal/parser/wsil/HTMLHeadHandler.java')
-rw-r--r-- | bundles/org.eclipse.wst.ws.parser/src/org/eclipse/wst/ws/internal/parser/wsil/HTMLHeadHandler.java | 296 |
1 files changed, 0 insertions, 296 deletions
diff --git a/bundles/org.eclipse.wst.ws.parser/src/org/eclipse/wst/ws/internal/parser/wsil/HTMLHeadHandler.java b/bundles/org.eclipse.wst.ws.parser/src/org/eclipse/wst/ws/internal/parser/wsil/HTMLHeadHandler.java deleted file mode 100644 index e63c60df3..000000000 --- a/bundles/org.eclipse.wst.ws.parser/src/org/eclipse/wst/ws/internal/parser/wsil/HTMLHeadHandler.java +++ /dev/null @@ -1,296 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2001, 2006 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - * yyyymmdd bug Email and other contact information - * -------- -------- ----------------------------------------------------------- - * 20060517 142324 rsinha@ca.ibm.com - Rupam Kuehner - *******************************************************************************/ - -package org.eclipse.wst.ws.internal.parser.wsil; - -import java.io.UnsupportedEncodingException; -import java.util.Vector; - -import org.xml.sax.Attributes; -import org.xml.sax.SAXException; -import org.xml.sax.SAXParseException; -import org.xml.sax.helpers.DefaultHandler; - -public class HTMLHeadHandler extends DefaultHandler -{ - private final char START_TAG = '<'; - private final char END_TAG = '>'; - private final String HEAD_START_TAG = "<head>"; - private final String HEAD_END_TAG = "</head>"; - private final String ROOT_START_TAG = "<root>"; - private final String ROOT_END_TAG = "</root>"; - private final String UTF8 = "UTF-8"; - - //HTML META tag information used to detect the charset. - private final String HTML_CONTENT = "content"; - private final String HTTP_EQUIV = "http-equiv"; - private final String HTTP_EQUIV_CONTENT_TYPE = "Content-Type"; - private final String CHARSET = "charset"; - - // WSIL tag information. - private final String META = "meta"; - private final String NAME = "name"; - private final String SERVICE_INSPECTION = "serviceInspection"; - private final String CONTENT = "content"; - - // DISCO tag information. - private final String LINK = "link"; - private final String TYPE = "type"; - private final String TEXT_XML = "text/xml"; - private final String REL = "rel"; - private final String ALTERNATE = "alternate"; - private final String HREF = "href"; - - private String baseURI_; - private Vector wsils_; - private Vector discos_; - private String byteEncoding = UTF8; //Default to UTF-8. - - public HTMLHeadHandler(String baseURI) - { - super(); - baseURI_ = baseURI; - wsils_ = new Vector(); - discos_ = new Vector(); - } - - public String[] getWsils() - { - String[] wsils = new String[wsils_.size()]; - wsils_.copyInto(wsils); - return wsils; - } - - public String[] getDiscos() - { - String[] discos = new String[discos_.size()]; - discos_.copyInto(discos); - return discos; - } - - public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException - { - String qNameLC = qName.toLowerCase(); - if (qNameLC.equals(META)) - { - String nameValue = attributes.getValue(NAME); - if (SERVICE_INSPECTION.equals(nameValue)) - { - String wsilURI = attributes.getValue(CONTENT); - if (baseURI_ != null && wsilURI.indexOf(":/") == -1) - { - StringBuffer sb = new StringBuffer(); - sb.append(baseURI_.substring(0, baseURI_.lastIndexOf("/")+1)); - sb.append(wsilURI); - wsilURI = sb.toString(); - } - if (!wsils_.contains(wsilURI)) - wsils_.add(wsilURI); - } - } - else if (qNameLC.equals(LINK)) - { - // See http://msdn.microsoft.com/msdnmag/issues/02/02/xml/default.aspx for more details on DISCO. - String type = attributes.getValue(TYPE); - String rel = attributes.getValue(REL); - String href = attributes.getValue(HREF); - if (TEXT_XML.equals(type) && ALTERNATE.equals(rel) && href != null) - { - String discoURI = href; - if (discoURI.indexOf(":/") == -1) - { - StringBuffer sb = new StringBuffer(); - sb.append(baseURI_.substring(0,baseURI_.lastIndexOf("/")+1)); - sb.append(discoURI); - discoURI = sb.toString(); - } - if (!discos_.contains(discoURI)) - discos_.add(discoURI); - } - } - } - - public void error(SAXParseException e) throws SAXException - { - } - - public void fatalError(SAXParseException e) throws SAXException - { - } - - public void warning(SAXParseException e) throws SAXException - { - } - - /** - * Appends the elements of the provided tag in the provided document to the provided StringBuffer. - * @param target - * @param document - * @param tag - * @param encoding - * @return boolean false if the value of the encoding parameter matched the detected charset or if no charset was detected. - * Returns true if a charset was detected and it did not equal the encoding parameter. If true is returned - * the harvesting of the tags would have stopped at the point the charset was detected. The caller - * should call this method again with the correct encoding. - */ - private boolean harvestTags(StringBuffer target,String document,String tag, String encoding) - { - boolean changeEncoding = false; - int index = document.indexOf(START_TAG); - int documentLength = document.length(); - int tagLength = tag.length(); - while (index != -1 && (index+1+tagLength)<documentLength) - { - String str = document.substring(index+1,index+1+tagLength); - if (str.toLowerCase().equals(tag)) - { - str = document.substring(index,document.indexOf(END_TAG,index+1)+1); - target.append(str); - index += str.length(); - - //If tag is META and declares the charset, find out what it is - //and if it matches what was passed in. If it matches, continue - //with the parsing and return false when complete. - //If the detected charset is different from what was passed in, - //- change byteEncoding to equal the detected charset. - //- stop parsing. - //- return true. - if (tag.equals(META)) - { - int idxOfContent = str.indexOf(HTML_CONTENT); - int idxOfHTTPEQUIV = str.indexOf(HTTP_EQUIV); - if (idxOfHTTPEQUIV!= -1 && idxOfContent != -1) - { - //Check if the http-equiv attribute is set to Content-Type. - int idxOfHTTPEQUIVOpenQuote = str.indexOf("\"", idxOfHTTPEQUIV+1); - int idxOfHTTPEQUIVClosingQuote = str.indexOf("\"", idxOfHTTPEQUIVOpenQuote+1); - String hTTPEQUIVValueUntrimmed = str.substring(idxOfHTTPEQUIVOpenQuote+1, idxOfHTTPEQUIVClosingQuote); - if (hTTPEQUIVValueUntrimmed.trim().equals(HTTP_EQUIV_CONTENT_TYPE)) - { - //This META tag contains the charset. Get the value of the content attribute - int idxOfOpenQuote = str.indexOf("\"", idxOfContent+1); - int idxOfClosingQuote = str.indexOf("\"", idxOfOpenQuote+1); - String contentValue = str.substring(idxOfOpenQuote+1, idxOfClosingQuote); - - //Get the charset - int idxOfCharSet = contentValue.indexOf(CHARSET); - int idxOfEquals = contentValue.indexOf("=", idxOfCharSet+CHARSET.length()); - String detectedEncodingValueUntrimmed = contentValue.substring(idxOfEquals+1); - String detectedEncodingValue = detectedEncodingValueUntrimmed.trim(); - if (!detectedEncodingValue.equals(encoding)) - { - byteEncoding = detectedEncodingValue; - changeEncoding = true; - break; - } - } - } - } - } - else - index++; - index = document.indexOf(START_TAG,index); - } - - return changeEncoding; - } - - - /** - * If the provided byte array reperesents the contents of an HTML - * document, this method will return a byte array in which - * <ul> - * <li>the opening and closing HEAD tags are removed and replaced with - * opening and closing <root> tags</li> - * <li>only the META and LINK elements are in the HTML document - * are included in the contents between the opening and closing - * <root> tags. - * </ul> - * This method will modify the value of the byteEncoding String - * attribute on this class if it is something other than - * UTF-8. Callers of this method should call getByteEncoding() - * after calling this method if they need to know the charset - * value used by this method to decode/endcode the byte array. - * @param b - * @return byte[] - */ - public byte[] harvestHeadTags(byte[] b) - { - String s; - - try - { - //Assume the default byte encoding of UTF-8 for now. - s = new String(b, byteEncoding); - } - catch (UnsupportedEncodingException uee) - { - s = new String(b); - } - String head = s.toLowerCase(); - int headStartIndex = head.indexOf(HEAD_START_TAG); - int headEndIndex = head.indexOf(HEAD_END_TAG); - StringBuffer sb = new StringBuffer(); - sb.append(ROOT_START_TAG); - if (headStartIndex != -1 && headEndIndex != -1) - { - head = s.substring(headStartIndex, headEndIndex+HEAD_END_TAG.length()); - boolean encodingChanged = harvestTags(sb,head,META, byteEncoding); - if (encodingChanged) - { - //The harvestTags method detected a different charset - //than the one that was passed in. Start from the beginning - //with the correct charset. - String s2; - try - { - s2 = new String(b, byteEncoding); - } - catch (UnsupportedEncodingException uee) - { - s2 = new String(b); - } - String head2 = s2.toLowerCase(); - int head2StartIndex = head2.indexOf(HEAD_START_TAG); - int head2EndIndex = head2.indexOf(HEAD_END_TAG); - sb = new StringBuffer(); - sb.append(ROOT_START_TAG); - if (head2StartIndex != -1 && head2EndIndex != -1) - { - head2 = s2.substring(head2StartIndex, head2EndIndex+HEAD_END_TAG.length()); - harvestTags(sb,head2,META, byteEncoding); - harvestTags(sb,head2,LINK,byteEncoding); - } - } - else - { - harvestTags(sb,head,LINK,byteEncoding); - } - } - sb.append(ROOT_END_TAG); - try - { - return sb.toString().getBytes(byteEncoding); - } catch (UnsupportedEncodingException uee) - { - return sb.toString().getBytes(); - } - - } - - public String getByteEncoding() - { - return byteEncoding; - } -} |