diff options
Diffstat (limited to 'bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/parser/XMLSourceParser.java')
-rw-r--r-- | bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/parser/XMLSourceParser.java | 621 |
1 files changed, 0 insertions, 621 deletions
diff --git a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/parser/XMLSourceParser.java b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/parser/XMLSourceParser.java deleted file mode 100644 index 69fc6d5843..0000000000 --- a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/parser/XMLSourceParser.java +++ /dev/null @@ -1,621 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2001, 2004 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - * Jens Lukowski/Innoopract - initial renaming/restructuring - * - *******************************************************************************/ -package org.eclipse.wst.xml.core.internal.parser; - - - -import java.io.StringReader; -import java.util.ArrayList; -import java.util.List; - -import org.eclipse.jface.text.BadLocationException; -import org.eclipse.jface.text.IDocument; -import org.eclipse.wst.sse.core.internal.document.DocumentReader; -import org.eclipse.wst.sse.core.internal.ltk.parser.BlockMarker; -import org.eclipse.wst.sse.core.internal.ltk.parser.BlockTagParser; -import org.eclipse.wst.sse.core.internal.ltk.parser.BlockTokenizer; -import org.eclipse.wst.sse.core.internal.ltk.parser.RegionParser; -import org.eclipse.wst.sse.core.internal.ltk.parser.StructuredDocumentRegionHandler; -import org.eclipse.wst.sse.core.internal.ltk.parser.StructuredDocumentRegionParser; -import org.eclipse.wst.sse.core.internal.ltk.parser.StructuredDocumentRegionParserExtension; -import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion; -import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion; -import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionContainer; -import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList; -import org.eclipse.wst.sse.core.internal.text.CharSequenceReader; -import org.eclipse.wst.sse.core.internal.text.IRegionComparible; -import org.eclipse.wst.sse.core.internal.util.Debug; -import org.eclipse.wst.xml.core.internal.Logger; -import org.eclipse.wst.xml.core.internal.regions.DOMRegionContext; - - -/** - * Takes input from the HTMLTokenizer and creates a tag list - */ - -public class XMLSourceParser implements RegionParser, BlockTagParser, StructuredDocumentRegionParser, IRegionComparible, StructuredDocumentRegionParserExtension { - // made public to aid access from inner classes in hierarchy. - // TODO: in future, figure out how to solve without exposing data. - public CharSequence fCharSequenceSource = null; - private IDocument fDocumentInput; - protected int fOffset = 0; - // DMW: 2/12/03. Removed some state data, since not really needed, - // and since it added a lot to overhead (since so many regions are - // created. - // protected IStructuredDocumentRegion fCurrentNode = null; - // protected IStructuredDocumentRegion fNodes = null; - // protected List fRegions = null; - // protected Object fInput = null; - protected String fStringInput = null; - protected List fStructuredDocumentRegionHandlers; - - protected BlockTokenizer fTokenizer = null; - protected long startTime; - protected long stopTime; - - /** - * HTMLSourceParser constructor comment. - */ - public XMLSourceParser() { - super(); - fStructuredDocumentRegionHandlers = new ArrayList(); - } - - /** - * This is a simple utility to count nodes. Used only for debug - * statements. - */ - protected int _countNodes(IStructuredDocumentRegion nodes) { - int result = 0; - IStructuredDocumentRegion countNode = nodes; - while (countNode != null) { - result++; - countNode = countNode.getNext(); - } - return result; - } - - public void addBlockMarker(BlockMarker marker) { - getTokenizer().addBlockMarker(marker); - } - - public void addStructuredDocumentRegionHandler(StructuredDocumentRegionHandler handler) { - if (fStructuredDocumentRegionHandlers == null) - fStructuredDocumentRegionHandlers = new ArrayList(); - fStructuredDocumentRegionHandlers.add(handler); - } - - public void beginBlockScan(String newTagName) { - getTokenizer().beginBlockTagScan(newTagName); - } - - /** - * @return IStructuredDocumentRegion - */ - protected IStructuredDocumentRegion createStructuredDocumentRegion(String type) { - IStructuredDocumentRegion newNode = null; - if (type == DOMRegionContext.BLOCK_TEXT) - newNode = XMLStructuredRegionFactory.createRegion(XMLStructuredRegionFactory.XML_BLOCK); - else - newNode = XMLStructuredRegionFactory.createRegion(XMLStructuredRegionFactory.XML); - return newNode; - } - - protected void fireNodeParsed(IStructuredDocumentRegion fCurrentNode) { - // never let an Exceptions from foreign code interfere with completion - // of parsing. To get an exception here is definitely a program error - // somewhere, - // we can't afford to interrupt the flow of control. or backwards - // typing can result! - // - // - try { - if (fCurrentNode != null && fStructuredDocumentRegionHandlers != null) { - for (int i = 0; i < fStructuredDocumentRegionHandlers.size(); i++) - ((StructuredDocumentRegionHandler) fStructuredDocumentRegionHandlers.get(i)).nodeParsed(fCurrentNode); - } - } - catch (Exception e) { - Logger.log(Logger.ERROR, e.getMessage()); - } - } - - public BlockMarker getBlockMarker(String tagName) { - List markers = getTokenizer().getBlockMarkers(); - for (int i = 0; i < markers.size(); i++) { - BlockMarker marker = (BlockMarker) markers.get(i); - if (marker.isCaseSensitive()) { - if (marker.getTagName().equals(tagName)) - return marker; - } - else { - if (marker.getTagName().equalsIgnoreCase(tagName)) - return marker; - } - } - return null; - } - - public List getBlockMarkers() { - return getTokenizer().getBlockMarkers(); - } - - /** - * @return IStructuredDocumentRegion - */ - public IStructuredDocumentRegion getDocumentRegions() { - IStructuredDocumentRegion headnode = null; - if (headnode == null) { - if (Debug.perfTest) { - startTime = System.currentTimeMillis(); - } - headnode = parseNodes(); - if (Debug.perfTest) { - stopTime = System.currentTimeMillis(); - System.out.println(" -- creating nodes of IStructuredDocument -- "); //$NON-NLS-1$ - System.out.println(" Time parse and init all regions: " + (stopTime - startTime) + " (msecs)"); //$NON-NLS-2$//$NON-NLS-1$ - // System.out.println(" for " + fRegions.size() + " - // Regions");//$NON-NLS-2$//$NON-NLS-1$ - System.out.println(" and " + _countNodes(headnode) + " Nodes"); //$NON-NLS-2$//$NON-NLS-1$ - } - } - return headnode; - } - - protected ITextRegion getNextRegion() { - ITextRegion region = null; - try { - region = getTokenizer().getNextToken(); - // DMW: 2/12/03 Removed state - // if (region != null) { - // fRegions.add(region); - // } - return region; - } - catch (StackOverflowError e) { - Logger.logException(getClass().getName() + ": input could not be parsed correctly at position " + getTokenizer().getOffset(), e); //$NON-NLS-1$ - throw e; - } - catch (Exception e) { - Logger.logException(getClass().getName() + ": input could not be parsed correctly at position " + getTokenizer().getOffset() + " (" + e.getLocalizedMessage() + ")", e); //$NON-NLS-3$//$NON-NLS-2$//$NON-NLS-1$ - } - return null; - } - - /** - * Return the full list of known regions. Typically getNodes should be - * used instead of this method. - */ - public List getRegions() { - IStructuredDocumentRegion headNode = null; - if (!getTokenizer().isEOF()) { - headNode = getDocumentRegions(); - // throw new IllegalStateException("parsing has not finished"); - } - // for memory recovery, we assume if someone - // requests all regions, we can reset our big - // memory consuming objects - // but the new "getRegions" method is then more expensive. - // I don't think its used much, though. - List localRegionsList = getRegions(headNode); - primReset(); - return localRegionsList; - } - - /** - * Method getRegions. - * - * @param headNode - * @return List - */ - protected List getRegions(IStructuredDocumentRegion headNode) { - List allRegions = new ArrayList(); - IStructuredDocumentRegion currentNode = headNode; - while (currentNode != null) { - ITextRegionList nodeRegions = currentNode.getRegions(); - for (int i = 0; i < nodeRegions.size(); i++) { - allRegions.add(nodeRegions.get(i)); - } - currentNode = currentNode.getNext(); - } - return allRegions; - } - - /** - * - * @return java.util.List - */ - public List getStructuredDocumentRegionHandlers() { - if (fStructuredDocumentRegionHandlers == null) { - fStructuredDocumentRegionHandlers = new ArrayList(0); - } - return fStructuredDocumentRegionHandlers; - } - - /** - * Returns text from the current input. Text is only valid before - * getNodes() has been called and only when a raw String or DocumentReader - * is given as the input. - */ - public String getText(int offset, int length) { - String text = null; - if (fCharSequenceSource != null) { - int start = fOffset + offset; - int end = start + length; - text = fCharSequenceSource.subSequence(start, end).toString(); - } - else if (fDocumentInput != null) { - try { - text = fDocumentInput.get(offset, length); - } - catch (BadLocationException e) { - text = ""; //$NON-NLS-1$ - } - } - else { - if (fStringInput == null || fStringInput.length() == 0 || offset + length > fStringInput.length() || offset < 0) { - text = ""; //$NON-NLS-1$ - } - else { - // offset is entirely valid during parsing as the parse - // numbers haven't been adjusted. - text = fStringInput.substring(offset, offset + length); - } - } - return text; - } - - protected BlockTokenizer getTokenizer() { - if (fTokenizer == null) { - fTokenizer = new XMLTokenizer(); - } - return fTokenizer; - } - - - public RegionParser newInstance() { - XMLSourceParser newInstance = new XMLSourceParser(); - newInstance.setTokenizer(getTokenizer().newInstance()); - return newInstance; - } - - protected IStructuredDocumentRegion parseNodes() { - // regions are initially reported as complete offsets within the - // scanned input - // they are adjusted here to be indexes from the currentNode's start - // offset - IStructuredDocumentRegion headNode = null; - IStructuredDocumentRegion lastNode = null; - ITextRegion region = null; - IStructuredDocumentRegion currentNode = null; - String type = null; - - while ((region = getNextRegion()) != null) { - type = region.getType(); - // these types (might) demand a IStructuredDocumentRegion for each - // of them - if (type == DOMRegionContext.BLOCK_TEXT) { - if (currentNode != null && currentNode.getLastRegion().getType() == DOMRegionContext.BLOCK_TEXT) { - // multiple block texts indicated embedded containers; no - // new IStructuredDocumentRegion - currentNode.addRegion(region); - currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart()); - region.adjustStart(-currentNode.getStart()); - // DW 4/16/2003 regions no longer have parents - // region.setParent(currentNode); - } - else { - // not continuing a IStructuredDocumentRegion - if (currentNode != null) { - // ensure that any existing node is at least - // terminated - if (!currentNode.isEnded()) { - currentNode.setLength(region.getStart() - currentNode.getStart()); - // fCurrentNode.setTextLength(region.getStart() - - // fCurrentNode.getStart()); - } - lastNode = currentNode; - } - fireNodeParsed(currentNode); - currentNode = createStructuredDocumentRegion(type); - if (lastNode != null) { - lastNode.setNext(currentNode); - } - currentNode.setPrevious(lastNode); - currentNode.setStart(region.getStart()); - currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart()); - currentNode.setEnded(true); - region.adjustStart(-currentNode.getStart()); - currentNode.addRegion(region); - // DW 4/16/2003 regions no longer have parents - // region.setParent(currentNode); - } - } - // the following contexts OPEN new StructuredDocumentRegions - else if ((currentNode != null && currentNode.isEnded()) || (type == DOMRegionContext.XML_CONTENT) || (type == DOMRegionContext.XML_CHAR_REFERENCE) || (type == DOMRegionContext.XML_ENTITY_REFERENCE) || (type == DOMRegionContext.XML_PI_OPEN) || (type == DOMRegionContext.XML_TAG_OPEN) || (type == DOMRegionContext.XML_END_TAG_OPEN) || (type == DOMRegionContext.XML_COMMENT_OPEN) || (type == DOMRegionContext.XML_CDATA_OPEN) || (type == DOMRegionContext.XML_DECLARATION_OPEN)) { - if (currentNode != null) { - // ensure that any existing node is at least terminated - if (!currentNode.isEnded()) { - currentNode.setLength(region.getStart() - currentNode.getStart()); - // fCurrentNode.setTextLength(region.getStart() - - // fCurrentNode.getStart()); - } - lastNode = currentNode; - } - fireNodeParsed(currentNode); - currentNode = createStructuredDocumentRegion(type); - if (lastNode != null) { - lastNode.setNext(currentNode); - } - currentNode.setPrevious(lastNode); - currentNode.setStart(region.getStart()); - currentNode.addRegion(region); - currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart()); - region.adjustStart(-currentNode.getStart()); - // DW 4/16/2003 regions no longer have parents - // region.setParent(currentNode); - } - // the following contexts neither open nor close - // StructuredDocumentRegions; just add to them - else if ((type == DOMRegionContext.XML_TAG_NAME) || (type == DOMRegionContext.XML_TAG_ATTRIBUTE_NAME) || (type == DOMRegionContext.XML_TAG_ATTRIBUTE_EQUALS) || (type == DOMRegionContext.XML_TAG_ATTRIBUTE_VALUE) || (type == DOMRegionContext.XML_COMMENT_TEXT) || (type == DOMRegionContext.XML_PI_CONTENT) || (type == DOMRegionContext.XML_DOCTYPE_INTERNAL_SUBSET)) { - currentNode.addRegion(region); - currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart()); - region.adjustStart(-currentNode.getStart()); - // DW 4/16/2003 regions no longer have parents - // region.setParent(currentNode); - } - // the following contexts close off StructuredDocumentRegions - // cleanly - else if ((type == DOMRegionContext.XML_PI_CLOSE) || (type == DOMRegionContext.XML_TAG_CLOSE) || (type == DOMRegionContext.XML_EMPTY_TAG_CLOSE) || (type == DOMRegionContext.XML_COMMENT_CLOSE) || (type == DOMRegionContext.XML_DECLARATION_CLOSE) || (type == DOMRegionContext.XML_CDATA_CLOSE)) { - currentNode.setEnded(true); - currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart()); - currentNode.addRegion(region); - region.adjustStart(-currentNode.getStart()); - // DW 4/16/2003 regions no longer have parents - // region.setParent(currentNode); - } - // this is extremely rare, but valid - else if (type == DOMRegionContext.WHITE_SPACE) { - ITextRegion lastRegion = currentNode.getLastRegion(); - // pack the embedded container with this region - if (lastRegion instanceof ITextRegionContainer) { - ITextRegionContainer container = (ITextRegionContainer) lastRegion; - container.getRegions().add(region); - // containers must have parent set ... - // setting for EACH subregion is redundent, but not sure - // where else to do, so will do here for now. - container.setParent(currentNode); - // DW 4/16/2003 regions no longer have parents - // region.setParent(container); - region.adjustStart(container.getLength() - region.getStart()); - } - currentNode.getLastRegion().adjustLength(region.getLength()); - currentNode.adjustLength(region.getLength()); - } - else if (type == DOMRegionContext.UNDEFINED && currentNode != null) { - // skip on a very-first region situation as the default - // behavior is good enough - // combine with previous if also undefined - if (currentNode.getLastRegion() != null && currentNode.getLastRegion().getType() == DOMRegionContext.UNDEFINED) { - currentNode.getLastRegion().adjustLength(region.getLength()); - currentNode.adjustLength(region.getLength()); - } - // previous wasn't undefined - else { - currentNode.addRegion(region); - currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart()); - region.adjustStart(-currentNode.getStart()); - } - } - else { - // if an unknown type is the first region in the document, - // ensure that a node exists - if (currentNode == null) { - currentNode = createStructuredDocumentRegion(type); - currentNode.setStart(region.getStart()); - } - currentNode.addRegion(region); - currentNode.setLength(region.getStart() + region.getLength() - currentNode.getStart()); - region.adjustStart(-currentNode.getStart()); - // DW 4/16/2003 regions no longer have parents - // region.setParent(currentNode); - if (Debug.debugTokenizer) - System.out.println(getClass().getName() + " found region of not specifically handled type " + region.getType() + " @ " + region.getStart() + "[" + region.getLength() + "]"); //$NON-NLS-4$//$NON-NLS-3$//$NON-NLS-2$//$NON-NLS-1$ - //$NON-NLS-3$//$NON-NLS-2$//$NON-NLS-1$ - } - - // these regions also get their own node, so close them cleanly - // NOTE: these regions have new StructuredDocumentRegions created - // for them above; it may - // be more readable if that is handled here as well, but the - // current layout - // ensures that they open StructuredDocumentRegions the same way - if ((type == DOMRegionContext.XML_CONTENT) || (type == DOMRegionContext.XML_CHAR_REFERENCE) || (type == DOMRegionContext.XML_ENTITY_REFERENCE)) { - currentNode.setEnded(true); - } - if (headNode == null && currentNode != null) { - headNode = currentNode; - } - } - if (currentNode != null) { - fireNodeParsed(currentNode); - currentNode.setPrevious(lastNode); - } - // fStringInput = null; - primReset(); - return headNode; - } - - protected void primReset() { - // fNodes = null; - // fRegions = null; - // fInput = null; - fStringInput = null; - fCharSequenceSource = null; - fDocumentInput = null; - fOffset = 0; - // fCurrentNode = null; - // DMW: also reset tokenizer so it doesn't hold on - // to large arrays - getTokenizer().reset(new char[0]); - } - - /* - * (non-Javadoc) - * - * @see org.eclipse.wst.sse.core.internal.text.IRegionComparible#regionMatches(int, - * int, java.lang.String) - */ - public boolean regionMatches(int offset, int length, String stringToCompare) { - // by definition - if (stringToCompare == null) - return false; - - boolean result = false; - if (fCharSequenceSource != null && fCharSequenceSource instanceof IRegionComparible) { - result = ((IRegionComparible) fCharSequenceSource).regionMatches(offset, length, stringToCompare); - } - else { - // old fashioned ways - String test = null; - if (fCharSequenceSource != null) { - test = fCharSequenceSource.subSequence(offset, offset + length).toString(); - } - else if (fStringInput != null) { - test = fStringInput.substring(offset, offset + length); - } - result = stringToCompare.equals(test); - } - return result; - } - - public boolean regionMatchesIgnoreCase(int offset, int length, String stringToCompare) { - // by definition - if (stringToCompare == null) - return false; - - boolean result = false; - if (fCharSequenceSource != null && fCharSequenceSource instanceof IRegionComparible) { - result = ((IRegionComparible) fCharSequenceSource).regionMatchesIgnoreCase(offset, length, stringToCompare); - } - else { - // old fashioned ways - String test = null; - if (fCharSequenceSource != null) { - test = fCharSequenceSource.subSequence(offset, offset + length).toString(); - } - else if (fStringInput != null) { - test = fStringInput.substring(offset, offset + length); - } - result = stringToCompare.equalsIgnoreCase(test); - } - return result; - } - - public void removeBlockMarker(BlockMarker marker) { - getTokenizer().removeBlockMarker(marker); - } - - public void removeBlockMarker(String tagName) { - getTokenizer().removeBlockMarker(tagName); - } - - public void removeStructuredDocumentRegionHandler(StructuredDocumentRegionHandler handler) { - if (fStructuredDocumentRegionHandlers == null) - return; - if (fStructuredDocumentRegionHandlers.contains(handler)) - fStructuredDocumentRegionHandlers.remove(handler); - } - - /** - * Resets the input. - */ - public void reset(java.io.FileInputStream instream) { - primReset(); - // fInput = instream; - getTokenizer().reset(instream); - } - - /** - * Resets the input. - */ - public void reset(java.io.Reader reader) { - reset(reader, 0); - } - - /** - * Resets the input. - */ - public void reset(java.io.Reader reader, int position) { - primReset(); - fOffset = position; - getTokenizer().reset(reader, position); - if (reader instanceof DocumentReader) { - IDocument doc = ((DocumentReader) reader).getDocument(); - if (doc instanceof CharSequence) { - fCharSequenceSource = (CharSequence) doc; - } - else { - // old fashioned IDocument - fDocumentInput = ((DocumentReader) reader).getDocument(); - } - - } - else if (reader instanceof CharSequenceReader) { - fCharSequenceSource = ((CharSequenceReader) reader).getOriginalSource(); - } - } - - /** - * Resets the input. Use this version to allow text to be retrieved - * <em>during</em> parsing, such as by the - * StructuredDocumentRegionHandler. - */ - public void reset(String sourceString) { - reset(new StringReader(sourceString)); - fStringInput = sourceString; - } - - /** - * Resets the input. Use this version to allow text to be retrieved - * <em>during</em> parsing, such as by the - * StructuredDocumentRegionHandler. - */ - public void reset(String sourceString, int position) { - StringReader reader = new StringReader(sourceString); - reset(reader, position); - fStringInput = sourceString; - } - - public void resetHandlers() { - if (fStructuredDocumentRegionHandlers != null) { - int size = fStructuredDocumentRegionHandlers.size(); - for (int i = 0; i < size; i++) - ((StructuredDocumentRegionHandler) fStructuredDocumentRegionHandlers.get(i)).resetNodes(); - } - } - - /** - * - * @param List - */ - public void setStructuredDocumentRegionHandlers(List newStructuredDocumentRegionHandlers) { - fStructuredDocumentRegionHandlers = newStructuredDocumentRegionHandlers; - } - - protected void setTokenizer(BlockTokenizer newTokenizer) { - // DMW: changed from private to protected, so subclass could use in - // creation of 'newInstance'. - fTokenizer = newTokenizer; - } -} |