diff options
Diffstat (limited to 'bundles/org.eclipse.equinox.ds/src/org/eclipse/equinox/internal/util/xml/XMLReader.java')
-rw-r--r-- | bundles/org.eclipse.equinox.ds/src/org/eclipse/equinox/internal/util/xml/XMLReader.java | 1329 |
1 files changed, 0 insertions, 1329 deletions
diff --git a/bundles/org.eclipse.equinox.ds/src/org/eclipse/equinox/internal/util/xml/XMLReader.java b/bundles/org.eclipse.equinox.ds/src/org/eclipse/equinox/internal/util/xml/XMLReader.java deleted file mode 100644 index e324bd562..000000000 --- a/bundles/org.eclipse.equinox.ds/src/org/eclipse/equinox/internal/util/xml/XMLReader.java +++ /dev/null @@ -1,1329 +0,0 @@ -/******************************************************************************* - * Copyright (c) 1997, 2008 by ProSyst Software GmbH - * http://www.prosyst.com - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * ProSyst Software GmbH - initial API and implementation - *******************************************************************************/ -package org.eclipse.equinox.internal.util.xml; - -import java.io.*; -import org.eclipse.equinox.internal.ds.Activator; -import org.eclipse.equinox.internal.util.string.CharBuffer; - -/** - * <p> - * Class used for reading of xml files, creating tree structure of 'TagClass' - * for each xml tag. When reader reaches a closed tag it notifies a given - * 'TagListener' and sends the last tag to it. If closing tag does not - * correspond with the last open IllegalArgumentException is thrown. There is a - * debug property 'xml.debug' used to dump an Exceptions occurred while - * operation is running. - * </p> - * - * <p> - * The parser, in general, is a simple XML parser that implements - * "Recursive descent" parsing method. - * - * Known limitations:<br> - * - * <pre> - * Currently this XMLParser does not support the following special tags: - * 1. <?TAG_NAME ..... ?> or also "Processing Instructions" - * 2. <!DOCTYPE .... > - * 3. <!ELEMENT .... > - * 4. <!ATTLIST .... > - * 5. <!ENTITY .... > - * </pre> - * - * <br> - * The parser skippes these tags (it searches for '>' symbol and closes the - * 'special' tag).<br> - * - * @author Ivan Dimitrov - * @author Pavlin Dobrev - * @version 1.0 - */ - -public class XMLReader { - - private static final String DEBUG = "equinox.ds.xml.debug"; - private static final String SET_OLD_BEHAVIOUR = "equinox.ds.xml.oldbehaviour"; - private static final String SET_OLD_LEVELS = "equinox.ds.xml.oldlevels"; - private static final String INTERN_ATTRIBUTES = "equinox.ds.xml.intern.attributes"; - - private static final String CDATA = "CDATA"; - private static final String XML = "xml"; - private static final String VERSION = "version"; - private static final String ENCODING = "encoding"; - private static final String STANDALONE = "standalone"; - - private static final String ERR_EOS = "End-of-stream reached before the end of XML."; - private static final String ERR_ENTITY_EXPECTED = "Entity reference or Character reference expected."; - private static final String ERR_EQUAL_EXPECTED = "'=' expected."; - private static final String ERR_QUOT_EXPECTED = "''' or '\"' expected."; - private static final String ERR_GT_EXPECTED = "'>' expected."; - private static final String ERR_LT_EXPECTED = "'<' expected."; - private static final String ERR_CLOSE_TAG1_EXPECTED = "'/' or tag name expected."; - private static final String ERR_CLOSE_TAG2_EXPECTED = "'>', '/>' or more attributes expected."; - private static final String ERR_CLOSE_TAG3_EXPECTED = "'?>' expected."; - private static final String ERR_CONTENT_EXPECTED = "Content data, new tag or closing tag expected."; - private static final String ERR_QUESTIONMARK_EXPECTED = "'?' expected."; - private static final String ERR_ILLEGAL_CHARACTER = "Illegal character."; - private static final String ERR_TAGNAME_EXPECTED = "Tag name expected."; - private static final String ERR_TAGNAME2_EXPECTED = "Tag name, '?' or '!' expected."; - private static final String ERR_DASH_EXPECTED = "'-' expected."; - private static final String ERR_COMMENT_CLOSE_EXPECTED = "'-->' expected."; - private static final String ERR_CDATA_EXPECTED = "'CDATA' expected."; - private static final String ERR_OPENSQBRACKET_EXPECTED = "'[' expected."; - private static final String ERR_CLOSE_CDATA_EXPECTED = "']]>' expected."; - private static final String ERR_SEMICOLON_EXPECTED = "';' expected."; - private static final String ERR_XMLPROLOG_EXPECTED = "XML prolog '<?xml' is not expected at this position."; - private static final String ERR_VERSION_EXPECTED = "'version' attribute expected."; - private static final String ERR_ENCODING_STANDALONE_EXPECTED = "'encoding', 'standalone' or '?>' expected."; - private static final String ERR_STANDALONE_EXPECTED = "'standalone' attribute expected."; - - private static final boolean fDebug = Activator.getBoolean(DEBUG); - private static final boolean fOldBehaviour = Activator.getBoolean(SET_OLD_BEHAVIOUR); - private static final boolean fOldLevels = Activator.getBoolean(SET_OLD_LEVELS); - private static final boolean fInternAttributes = Activator.getBoolean(INTERN_ATTRIBUTES); - - private String fDefaultEncoding = "UTF-8"; - - // private CharBuffer c; - private CharBuffer temp = new CharBuffer(); - private CharBuffer temp2 = null; - - protected Reader fReader = null; - protected InputStream fStream = null; - - protected char currentChar = 0; - protected TagListener fTagListener; - - protected int fLine = 1; - protected int fPos = 0; - - protected int fLevel = -1; - protected int fCurrentLevel = 1; - - private String fVersion = "1.0"; - private String fEncoding = "UTF-8"; - private String fStandalone = "no"; - - protected static final String[] fNew_entities = {"amp", "apos", "lt", "gt", "quot"}; - protected static final char[] fNew_ent_chars = {'&', '\'', '<', '>', '"'}; - - protected static final String[] fOld_entities = {"amp", "nbsp", "crlf", "tab", "lt", "gt", "quot", "apos"}; - protected static final char[] fOld_ent_chars = {'&', ' ', '\n', '\t', '<', '>', '"', '\''}; - - /** - * An empty default constructor - */ - public XMLReader() { - // - } - - /** - * Constructs a new XMLReader. <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aInputStream - * an InputStream to read the XML file from - * @param aListener - * TagListener that will be notified on tag close event - * @throws IOException - */ - public XMLReader(InputStream aInputStream, TagListener aListener) { - fStream = aInputStream; - fTagListener = aListener; - } - - /** - * Constructs a new XMLReader. <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aReader - * a reader that will be used to read the XML file from - * @param aListener - * TagListener that will be notified on tag close event - */ - public XMLReader(Reader aReader, TagListener aListener) { - fReader = aReader; - fTagListener = aListener; - } - - /** - * Parses a XML file given through aInputStream and during the parsing - * notifies aListener for close-tag events <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aInputStream - * an InputStream to read the XML file from - * @param aListener - * TagListener that will be notified on close-tag events - * @throws IOException - */ - public static void read(InputStream aInputStream, TagListener aListener) throws IOException { - parseXML(aInputStream, aListener, -1); - } - - /** - * Parses a XML file given through aInputStream and during the parsing - * notifies aListener for close-tag events <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aInputStream - * an InputStream to read the XML file from - * @param aListener - * TagListener that will be notified on close-tag events - * @param aLevel - * see parseXML(Reader aReader, TagListener aListener, int aLevel - * description - * @throws IOException - */ - public static void read(InputStream aInputStream, TagListener aListener, int aLevel) throws IOException { - parseXML(aInputStream, aListener, aLevel); - } - - /** - * Parses a XML file given through aReader and during the parsing notifies - * aListener for close-tag events <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aReader - * a Reader to read the XML file from - * @param aListener - * TagListener that will be notified on close-tag events - * @throws IOException - */ - public static void read(Reader aReader, TagListener aListener) throws IOException { - parseXML(aReader, aListener, -1); - } - - /** - * Parses a XML file given through aReader and during the parsing notifies - * aListener for close-tag events <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aReader - * a Reader to read the XML file from - * @param aListener - * TagListener that will be notified on close-tag events - * @param aLevel - * see parseXML(Reader aReader, TagListener aListener, int aLevel - * description - * @throws IOException - */ - public static void read(Reader aReader, TagListener aListener, int aLevel) throws IOException { - parseXML(aReader, aListener, aLevel); - } - - /** - * Sets the parser's encoding. If there is a current encoding associated - * with the parser the method returns immediately - * - * @param aEncoding - * new encoding to be set - * @throws UnsupportedEncodingException - * if the encoding is not supported. - */ - protected void setEncoding(String aEncoding) { - if (fReader == null) { - try { - fReader = new InputStreamReader(fStream, aEncoding); - } catch (Exception e) { - if (fDebug) { - System.err.println("[XMLReader] Failed setting the encoding \"" + aEncoding + "\", continue parsing with the default one."); - } - fReader = new InputStreamReader(fStream); - } - } - } - - /** - * Sets the level of tags bellow which the listener will be notified for. - * For internal use only. - * - * @param aLevel - */ - protected void setLevel(int aLevel) { - fLevel = aLevel; - } - - /* A helper function to reuse a temp CharBuffers without recreating it */ - protected CharBuffer getCharBuffer() { - if (temp.length() <= 0) { - return temp; - } else if (temp2 == null) { - temp2 = new CharBuffer(0); - return temp2; - } else if (temp2.length() <= 0) { - return temp2; - } - return new CharBuffer(0); - } - - protected char prev_char = 0; - protected char[] fBuffer = new char[4096]; - protected int fBufferLen = 0; - protected int fBufferPos = 0; - - /** - * Reads the next char from the input stream and sets it to private field - * <code>currentChar</code> - * - * @return true if the next char is successfully read of false if - * End-Of-Stream is reached - * @throws IOException - * if some error occurs during reading the character or if the - * caller tries to read beyond the End-Of-Stream. - */ - protected boolean getNextChar() throws IOException { - // // Reading characters without buffering - // int ichar = 0; - // int count = 0; - // while (ichar == 0 && count < 100) { - // ichar = fReader.read(); - // count++; - // } - // - // if (ichar == 0) - // throw new IOException("Failed to read from the input file."); - // - // if (ichar < 0 && prev_char == 0) - // throw new IOException(ERR_EOS); - // - // char ch = (char) ichar; - - char ch; - - if (fReader == null) { // If there is no associated reader, - int ach = fStream.read(); // then reads from the InputStream until - if (ach < 0) { // the rigth encoding is recognized - ach = 0; - } - - ch = (char) ach; - if (ch == 0 && prev_char == 0) { - throw new IOException(ERR_EOS); - } - } else { - if (fBufferLen < 0) { - throw new IOException(ERR_EOS); - } - - if ((fBufferPos) >= fBufferLen) { - // Refetch the buffer - fBufferLen = 0; - fBufferPos = 0; - int count = 0; - while (fBufferLen == 0 && count < 100) { - fBufferLen = fReader.read(fBuffer); - count++; - } - - ch = (fBufferLen > 0) ? fBuffer[fBufferPos++] : 0; - - if (fBufferLen == 0) { - fBufferLen = -1; - } - } else { - ch = fBuffer[fBufferPos++]; - } - } - - prev_char = currentChar; - currentChar = ch; - fPos++; - - switch (ch) { - case '\n' : - if (prev_char != '\r') { - fLine++; - } - fPos = 0; - break; - case '\r' : - fPos = 0; - fLine++; - break; - } - return (currentChar != 0); - } - - /** - * Parses the attribute value and if it's successful then adds it to the - * CharBuffer. If there are EntityReferences of CharReferences in the - * attribute value, they will be turned to their equivalent symbols.<br> - * attr_value ::= (acceptable_char | EntityRef | CharRef)* - quot_symbol - * - * @see parse_attr - * @see parse_CharRef - * @see parse_EntityRef - */ - protected void parse_attr_value(CharBuffer sb, char quot) throws IOException { - while (currentChar != quot && currentChar != '<') { - if (accept_char('&')) { - if (!parse_CharRef(sb)) { - if (!parse_EntityRef(sb, true)) { - err(fPos - 1, ERR_ENTITY_EXPECTED); - } - } - } else { - sb.append(currentChar); - - if (!getNextChar()) { - break; - } - } - } - } - - /** - * Parses an attribute with the given simplified grammar:<br> - * - * <pre> - * attribute ::= S* + attr_name + S* + '=' + S* + ('\'' + (attr_value - '\'') + '\'')) | ('"' + (attr_value - '"') + '"')) - * attr_value ::= (acceptable_char | EntityRef | CharRef)* - * attr_name ::= identifier - * </pre> - * - * @param aParent - * the parent tag where the correctly parsed attribute will be - * added - * @throws IOException - * @see parse_identifier - * @see parse_attr_value - */ - protected boolean parse_attr(CharBuffer cb) throws IOException { - clearWhiteSpaces(); - - cb.append(' '); - int length = parse_identifier(cb); - - if (length > 0) { - clearWhiteSpaces(); - - if (!accept_char('=')) { - err(ERR_EQUAL_EXPECTED); - } - - cb.append('='); - clearWhiteSpaces(); - - char quot = 0; - if (accept_char('"')) { - quot = '"'; - } else if (accept_char('\'')) { - quot = '\''; - } else { - err(ERR_QUOT_EXPECTED); - } - - cb.append(quot); - parse_attr_value(cb, quot); - - if (!accept_char(quot)) { - err("'" + quot + "' expected."); - } - - cb.append(quot); - return true; - } - return false; - } - - /** - * Parses a tag attribute list with the following simplified grammar: - * - * <pre> - * attr_list ::= attribute* - * @param aParent the parent tag that the parsed attributes will be added to - * @return true if at least one attribute is parsed correctly and false otherwise - * @throws IOException - * @see parse_attr - * - */ - protected boolean parse_attr_list(TagClass aParent) throws IOException { - boolean result = false; - - CharBuffer cb = getCharBuffer(); - cb.append(aParent.getName()); - while (parse_attr(cb)) { - result = true; - } - - aParent.fAttributes = ((fInternAttributes) ? cb.toString().intern() : cb.toString()); - cb.setLength(0); - return result; - } - - private static final char bA = 'A' - 1; - private static final char aZ = 'Z' + 1; - private static final char ba = 'a' - 1; - private static final char az = 'z' + 1; - private static final char b0 = '0' - 1; - private static final char a9 = '9' + 1; - - /** - * This method returns true is the passed character may be used as starting - * character for tag name and attribute name - * - * @param ch - * the tested character - * @return true if the character could be used as starting character for a - * tag name and an attribute name and false otherwise - */ - public final static boolean isNameStartChar(char ch) { - return (ch > bA && ch < aZ) || (ch > ba && ch < az) || (ch == ':') || (ch == '_') || (ch > 0xBF && ch < 0xD7) || (ch > 0xD7 && ch < 0xF7) || (ch > 0xF7 && ch < 0x300) || (ch > 0x36F && ch < 0x37E) || (ch > 0x37E && ch < 0x2000) || (ch > 0x200B && ch < 0x200E) || (ch > 0x206F && ch < 0x2190) || (ch > 0x2BFF && ch < 0x2FF0) || (ch > 0x3000 && ch < 0xD800) || (ch > 0xF900 && ch < 0xFDD0) || (ch > 0xFDEF && ch < 0xFFFE) || (ch > 0x0FFFF && ch < 0xF0000); - } - - /** - * This method returns true if the passed characted may be used as part of a - * tag name or an attribute name - * - * @param ch - * the tested character - * @return true is the characted could be used as part of a tag name or an - * attribute name and false otherwise - */ - public final static boolean isNameChar(char ch) { - return (ch == '-') || (ch == '.') || (ch == 0xB7) || (ch > b0 && ch < a9) || isNameStartChar(ch) || (ch > 0x02FF && ch < 0x0370) || (ch > 0x203E && ch < 0x2041); - } - - /** - * Parses an identifier. - * - * @return an identifier as a string if it is parsed successfully and null - * otherwise - * @throws IOException - * if an exception occurs during read operations from the Reader - * or the InputStream - */ - protected String parse_identifier() throws IOException { - if (isNameStartChar(currentChar)) { - CharBuffer sb = getCharBuffer(); - - while (isNameChar(currentChar)) { - sb.append(currentChar); - - if (!getNextChar()) { - break; - } - } - String result = sb.toString().intern(); - sb.setLength(0); - return result; - } - return null; - } - - /** - * Parses an identifier and places it into the passed CharBuffer - * - * @param cb - * CharBuffer where the parsed identifier will be placed into - * @return the length of the parsed identifier - * @throws IOException - * if an exception occurs during read operations from the Reader - * or the InputStream - */ - protected int parse_identifier(CharBuffer cb) throws IOException { - if (isNameStartChar(currentChar)) { - int length = 0; - while (isNameChar(currentChar)) { - cb.append(currentChar); - length++; - - if (!getNextChar()) { - break; - } - } - - return length; - } - return 0; - } - - /** - * Parses a tag name and if it is successfully parsed the method sets it as - * a name of the parent tag - * - * @param aParent - * parent tag - * @return true if the name is parsed successfully and false otherwise - * @throws IOException - * @see parse_identifier - */ - protected boolean parse_tag_name(TagClass aParent) throws IOException { - String name = parse_identifier(); - if (name != null) { - aParent.setName(name); - } - return name != null; - } - - /** - * Helper function that notify listeners depending on certain conditions - * such as if the tag event is on-close or on-open - * - * @param aTag - * The tag that the notification event is valid for. - * @param isStart - * true if the event is on-open and false if it is on-close - */ - protected void notifyListeners(TagClass aTag) { - try { - if (fLevel <= 0 || fLevel == fCurrentLevel) { - fTagListener.useTag(aTag); - } - } catch (RuntimeException re) { - if (fDebug) { - System.err.println("An outside exception occurred while processing a tag on line " + aTag.getLine() + ", the tag name is: " + aTag.getName() + ", the level is: " + fCurrentLevel); - re.printStackTrace(System.err); - } - throw re; - } - } - - /** - * Parses a normal tag. There are two cases - (1) the tag has separate open - * and close tag elements and (2) the tag is simple suchas <tag_name ... - * /> - * - * @param aParent - * The parent tag that this tag will be added to if the parsing - * is successful - * @return true on success and false otherwise - * @throws IOException - * @see clearWhiteSpaces - * @see parse_tag_name - * @see parse_attr_list - * @see notifyListeners - * @see accept_char - * @see accept_seq - * @see parse_PCDATA - */ - protected boolean parse_tag_normal(TagClass aParent) throws IOException { - // Looking for a tag_name (identifier) - if (isNameStartChar(currentChar)) { - TagClass tag = new TagClass(); - tag.setLine(fLine); - - parse_tag_name(tag); - parse_attr_list(tag); - - clearWhiteSpaces(); - - if (accept_char('/')) { - if (!accept_char('>')) { - err(ERR_GT_EXPECTED); - } - tag.setInline(); - aParent.addTag(tag); - - if (!fOldBehaviour) { - notifyListeners(tag); - } - - return true; - } else if (accept_char('>')) { - - while (true) { - clearWhiteSpaces(); - int pos = fPos; - if (currentChar == '<') { // Normal tag, Special tag or - // closing tag - if (!parse_tag(tag)) { // It may be a special tag. - if (!accept_char('/')) { - err(pos + 1, ERR_CLOSE_TAG1_EXPECTED); - } - - // trying to accept: tag_name + S* + '>' - pos = fPos; - if (!accept_seq(tag.getName())) { - err(pos, '\'' + tag.getName() + "' string expected."); - } - - clearWhiteSpaces(); - if (!accept_char('>')) { - err(ERR_GT_EXPECTED); - } - - aParent.addTag(tag); - - notifyListeners(tag); - - return true; - } - } else { - if (!parse_PCDATA(tag)) - break; - } - } - err(ERR_CONTENT_EXPECTED); - } else { - err(ERR_CLOSE_TAG2_EXPECTED); - } - } - return false; - } - - /** - * Parses special tags, such that begins with:<br> - * - * <pre><code> - * <!-- comments - * <!tag_name Parsing instructions - * <![ CDATA element - * <? DOCTYPE, etc. - * </code></pre> - * - * @param aParent - * The parent tag that this tag will be added to if the parsing - * is successful - * @return true on success and false otherwise - * @throws IOException - * @see accept_char - * @see clearWhiteSpaces - * @see parse_tag_CDATA - * @see parse_tag_name - * @see parse_comment - */ - protected boolean parse_tag_special(TagClass aParent) throws IOException { - if (accept_char('!')) { - - TagClass tag = new TagClass(); - - if (parse_tag_name(tag)) { - clearWhiteSpaces(); - - while (true) { - if (accept_char('>')) { - clearWhiteSpaces(); - return true; - } - getNextChar(); - } - } else if (parse_tag_CDATA(aParent)) { // parse CDATA tag - return true; - } else if (parse_comment(tag)) { - return true; - } - } else if (accept_char('?')) { - TagClass tag = new TagClass(); - - int pos = fPos; - if (parse_tag_name(tag)) { - if (tag.getName().equals(XML)) { - err(pos - 2, ERR_XMLPROLOG_EXPECTED); - } - - char prevCh = 0; - while (true) { - if (currentChar == '>') { - if (prevCh == '?') { - accept_char('>'); - clearWhiteSpaces(); - return true; - } - } - prevCh = currentChar; - getNextChar(); - } - - } - err(pos, ERR_TAGNAME_EXPECTED); - } - return false; - } - - /** - * Parses an attribute value and returns it as a string - * - * @return the parsed attribute value as a string. - * @throws IOException - * if an exception occurs during read operations from the Reader - * or the InputStream - */ - protected String getAttrValue() throws IOException { - CharBuffer cb = getCharBuffer(); - - clearWhiteSpaces(); - accept_char('='); - clearWhiteSpaces(); - - if (currentChar != '\'' && currentChar != '"') { - err(ERR_QUOT_EXPECTED); - } - - char quot = currentChar; - accept_char(quot); - parse_attr_value(cb, quot); - - if (!accept_char(quot)) { - err("'" + quot + "' expected."); - } - - String result = cb.toString(); - cb.setLength(0); - clearWhiteSpaces(); - return result; - } - - /** - * Parses the XML prolog tag, i.e.<br> - * <code> <?xml version="..." encoding="..." standalone="..." ?> </code><br> - * - * @param parent - * the parent tag (in this case this is the root "fake" tag, - * which the listeners will never be informed for...) - * @throws IOException - * if an exception occurs during read operations from the Reader - * or the InputStream - */ - protected boolean parse_xml_prolog(TagClass parent) throws IOException { - if (accept_char('?')) { - TagClass tag = new TagClass(); - - if (parse_tag_name(tag)) { - if (tag.getName().equalsIgnoreCase(XML)) { - if (fOldLevels) - fCurrentLevel++; - - clearWhiteSpaces(); - - int pos = fPos; - - String s = parse_identifier(); - - boolean bEncoding = false; - boolean bStandalone = false; - - if (VERSION.equals(s)) { - fVersion = getAttrValue(); - s = parse_identifier(); - } else { - err(pos, ERR_VERSION_EXPECTED); - } - - if (ENCODING.equals(s)) { - fEncoding = getAttrValue().toUpperCase(); - s = parse_identifier(); - bEncoding = true; - } - - if (STANDALONE.equals(s)) { - fStandalone = getAttrValue(); - s = parse_identifier(); - bStandalone = true; - } - - if (s != null) { - if (bEncoding && bStandalone) - err(ERR_CLOSE_TAG3_EXPECTED); - - if (!bEncoding && !bStandalone) - err(ERR_ENCODING_STANDALONE_EXPECTED); - - if (bEncoding) - err(ERR_STANDALONE_EXPECTED); - err(ERR_CLOSE_TAG3_EXPECTED); - } - - clearWhiteSpaces(); - pos = fPos; - if (!accept_seq("?>")) - err(pos, ERR_CLOSE_TAG3_EXPECTED); - return true; - } - - char prevCh = 0; - - while (true) { - if (currentChar == '>') { - if (prevCh == '?') { - accept_char('>'); - clearWhiteSpaces(); - - return true; - } - err(ERR_QUESTIONMARK_EXPECTED); - } else if (currentChar == '<') { - err(ERR_ILLEGAL_CHARACTER + " ('<')"); - } - prevCh = currentChar; - getNextChar(); - } - - } - } - return false; - } - - /** - * Parses a comment. The grammar is:<br> - * Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->'<br> - * Note that the grammar does not allow a comment ending in --->. The - * following example is not well-formed.<br> - * <code> - * <!-- B+, B, or B---></code> - * - * @param aParent - * The parent tag - * @return true on success and false otherwise - * @throws IOException - * @see accept_char - */ - protected boolean parse_comment(TagClass aParent) throws IOException { - if (accept_char('-')) { - if (!accept_char('-')) { - err(ERR_DASH_EXPECTED); - } - - while (true) { - if (accept_char('-')) { - if (accept_char('-')) { - if (accept_char('>')) { - break; - } - - err(ERR_GT_EXPECTED); - } - } - - if (!getNextChar()) { - err(ERR_COMMENT_CLOSE_EXPECTED); - } - } - return true; - } - return false; - } - - /** - * Parses either normal or special tag - * - * @param aParent - * The parent tag that the successfully parsed tag will (if it is - * normal tag or CDATA element) be added - * @return true on success and false otherwise - * @throws IOException - * @see accept_cahr - * @see parse_tag_normal - * @see parse_tag_special - * @see clearWhiteSpaces - */ - protected boolean parse_tag(TagClass aParent) throws IOException { - clearWhiteSpaces(); - try { - fCurrentLevel++; - - if (accept_char('<')) { - if (parse_tag_normal(aParent) || parse_tag_special(aParent)) { - return true; - } - } - return false; - } finally { - fCurrentLevel--; - } - } - - /** - * Parses the content of the tag (including sub-tags and sub-elements) - * - * @param aParent - * The parent tag that the content and tags will be added to - * @return true on success and false otherwise - * @throws IOException - * @see parse_PCDATA - * @see parse_tag - */ - protected boolean parse_content(TagClass aParent) throws IOException { - return (parse_PCDATA(aParent) || parse_tag(aParent)); - } - - /** - * Parses a CDATA tag (or CDATA content element). - * - * @param aParent - * The parent tag that the content will be added to - * @return true on success and false otherwise - * @throws IOException - */ - protected boolean parse_tag_CDATA(TagClass aParent) throws IOException { - if (accept_char('[')) { - int pos = fPos; - if (!accept_seq(CDATA)) - err(pos, ERR_CDATA_EXPECTED); - - if (!accept_char('[')) - err(ERR_OPENSQBRACKET_EXPECTED); - - do { - if (currentChar != '>') { - aParent.getContentBuffer().append(currentChar); - } else { - CharBuffer sb = aParent.getContentBuffer(); - int l = sb.length(); - - if (l >= 2) { - if (sb.charAt(l - 1) == ']' && sb.charAt(l - 2) == ']') { - sb.setLength(l - 2); // Truncates the extra "]]" - // symbols appended at the - // end - - getNextChar(); - return true; - } - } - sb.append(currentChar); - } - } while (getNextChar()); - - err(fPos - 1, ERR_CLOSE_CDATA_EXPECTED); - } - return false; - } - - /** - * Parses PCDATA content (Parseable Content DATA). The EntityRefs and - * CharRefs that are parsed will be turned to its symbol equivalent. - * - * @param aParent - * The parent tag that the PCDATA will be added to - * @return true on success and false otherwise - * @throws IOException - * @see accept_char - * @see parse_CharRef - * @see parse_EntityRef - */ - protected boolean parse_PCDATA(TagClass aParent) throws IOException { - boolean result = false; - while (currentChar != '<') { - result = true; - - CharBuffer sbContent = aParent.getContentBuffer(); - - if (accept_char('&')) { - int pos = fPos; - - if (!parse_CharRef(sbContent)) - if (!parse_EntityRef(sbContent, false)) - err(pos - 1, ERR_ENTITY_EXPECTED); - - } else { - sbContent.append(currentChar); - - if (!getNextChar()) - break; - } - } - return result; - } - - /** - * Accepts one character from the input stream and if it's successful moves - * one character forward. - * - * @param ch - * The character that should be accepted - * @return true on success and false otherwise - * @throws IOException - */ - protected boolean accept_char(char ch) throws IOException { - if (currentChar == ch) { - getNextChar(); - return true; - } - return false; - } - - /** - * Accepts a sequence of characters given by seq parameter. If the sequence - * is accepted successfully then the currentChar field will contain the - * character immediately after the accepted sequence. - * - * @param seq - * The character sequence that should be accepted - * @return true on success and false otherwise - * @throws IOException - */ - protected boolean accept_seq(String seq) throws IOException { - for (int i = 0; i < seq.length(); i++) { - if (!accept_char(seq.charAt(i))) - return false; - } - return true; - } - - private static final String[] fEntities = (fOldBehaviour) ? fOld_entities : fNew_entities; - private static final char[] fEnt_chars = (fOldBehaviour) ? fOld_ent_chars : fNew_ent_chars; - - /** - * <code> - * EntityRef ::= '&' + EntityValue + ';'<br> - * EntityValue ::= 'amp' | 'quot' | 'apos' | 'gt' | 'lt' | identifier - * </code> - * - * @param sb - * The string buffer that the recognized entity will be appended - * to - * @throws IOException - * @return true on success and false otherwise - * @see parse_identifier - * @see accept_char - */ - protected boolean parse_EntityRef(CharBuffer sb, boolean inAttribute) throws IOException { - String ent = parse_identifier(); - - if (!accept_char(';')) { - err(ERR_SEMICOLON_EXPECTED); - } - - if (!inAttribute) { - int length = fEntities.length; - for (int i = 0; i < length; i++) { - if (fEntities[i] == ent) { // 'ent' is interned by - // parse_identifier() function - sb.append(fEnt_chars[i]); - return true; - } - } - } - - sb.append('&'); - if (ent != null) { - sb.append(ent); - } - sb.append(';'); - - return true; - } - - /** - * Parses a CharReference and if it is successful then appends it to the - * passed CharBuffer - * - * @param sb - * CharBuffer that the parsed CharReference will be added to - * @return true on success and false otherwise - * @throws IOException - */ - protected boolean parse_CharRef(CharBuffer sb) throws IOException { - if (accept_char('#')) { - // TODO - Postponed... - while (currentChar != ';') { - getNextChar(); - } - - if (!accept_char(';')) { - err(fPos - 1, ERR_SEMICOLON_EXPECTED); - } - - return true; - } - return false; - } - - /** - * Clears the white spaces starting from the current position - * - * @throws IOException - */ - protected void clearWhiteSpaces() throws IOException { - while (Character.isWhitespace(currentChar)) { - if (!getNextChar()) - break; - } - } - - /** - * Throws an IOException with a given message. The current line number and - * line position are appended to the error message - * - * @param message - * The message of the exception - * @throws IOException - */ - protected void err(String message) throws IOException { - err(fPos, message); - } - - /** - * Throws an IOException with the given message for the given line position. - * The current line number and position (pos) are appended to the exception - * message - * - * @param pos - * The line position that the error will be reported for - * @param message - * @throws IOException - */ - protected void err(int pos, String message) throws IOException { - throw new IOException("[Line: " + fLine + ", Pos: " + pos + "] " + message); - } - - /** - * Initiates parsing of the XML file given through aInputStream or aReader - * in the given constructor when creating XMLReader object. - * - * @throws IOException - * if an error occurs during reading the XML file or if a - * parsing error eccurs. - */ - protected void parseXML() throws IOException { - TagClass rootTag = new TagClass(); - - try { - getNextChar(); - clearWhiteSpaces(); - - boolean start = false; - - while (accept_char('<')) { - start = true; - int pos = fPos; - - if (fPos == 2 && fLine == 1) { - if (parse_xml_prolog(rootTag)) { - // System.out.println("XML Prolog found."); - // System.out.println("XML Version: " + fVersion + ", - // encoding: " + fEncoding); - setEncoding(fEncoding); - clearWhiteSpaces(); - continue; - } - } else { - setEncoding(fDefaultEncoding); - } - - if (!parse_tag_special(rootTag)) { - if (parse_tag_normal(rootTag)) { - // TODO da se proveri dali e dostignat kraja na file-a, - // ako ne e - - // togava ot tuk natatuk moje da ima samo komentari. - return; - } - err(pos, ERR_TAGNAME2_EXPECTED); - } - - clearWhiteSpaces(); - } - - if (!start) { - err(ERR_LT_EXPECTED); - } - } catch (IOException ioe) { - if (fDebug) { - ioe.printStackTrace(System.err); - } - - throw ioe; - } - } - - /** - * Parses a XML file given through aInputStream and during the parsing - * notifies aListener for close-tag events <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aInputStream - * an InputStream to read the XML file from - * @param aListener - * TagListener that will be notified on close-tag event - * @param aLevel - * see parseXML(Reader aReader, TagListener aListener, int aLevel - * description - * @throws IOException - * if some IO error occurs when reading the XML file or if a - * parser error occurs. - */ - public static void parseXML(InputStream aInputStream, TagListener aListener, int aLevel) throws IOException { - XMLReader xml = new XMLReader(aInputStream, aListener); - xml.setLevel(aLevel); - xml.parseXML(); - } - - /** - * Parses a XML file given through aReader and during the parsing notifies - * aListener for close-tag events <br> - * <br> - * <b>Note: The XMLReader does not close the passed Reader or InputStream - * - * @param aReader - * a reader that will be used to read the XML file from - * @param aListener - * TagListener that will be notified on close-tag event - * @param aLevel - * indicates the tag level that the listener will be invoked for. - * For example if the XML is:<br> - * - * <pre> - * <a> - * <b> - * <c /> - * </b> - * </a> - * </pre> - * - * <br> - * and the passed aLevel is 2 then the listener will be invoked - * only for tags that have level 2, i.e. in our example the - * listener will be invoked only for tag <b><br> - * <ul> - * <li>Value less than 0 indicates "invoke listener for all - * tags no matter what are their levels"</li> - * <li>Value of 0 indicates that the listener must not be - * invoked in general no matter what is the tag level</li> - * <li>Value greater than 0 indicates the tag level that the - * listener will be invoked for</li> - * @throws IOException - * if some IO error occurs when reading the XML file or if a - * parser error occurs. - */ - public static void parseXML(Reader aReader, TagListener aListener, int aLevel) throws IOException { - XMLReader xml = new XMLReader(aReader, aListener); - xml.setLevel(aLevel); - xml.parseXML(); - } - - /** - * Returns the XML version attribute - * - * @return the XML file version attribute - */ - public String getVersion() { - return fVersion; - } - - /** - * Returns the XML encoding attribute - * - * @return the XML encoding attribute - */ - public String getEncoding() { - return fEncoding; - } - - /** - * Returns the value of XML standalone attribute - * - * @return the value of XML standalone attribute - */ - public String getStandalone() { - return fStandalone; - } -} |