diff options
author | david_williams | 2005-05-25 18:12:25 +0000 |
---|---|---|
committer | david_williams | 2005-05-25 18:12:25 +0000 |
commit | c226beefc62fb7d41df170d1a27e8559de7f0996 (patch) | |
tree | 21c2320a4a62a7a2e44415ec2ca7a085746e5cde /bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype | |
parent | 2d43e9749bb107cf3238114c0156c2681b4916c4 (diff) | |
download | webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.gz webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.xz webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.zip |
fixes for unit tests and content type
Diffstat (limited to 'bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype')
2 files changed, 121 insertions, 63 deletions
diff --git a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java index fdba1fb9cc..082ef4171f 100644 --- a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java +++ b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java @@ -27,6 +27,13 @@ import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; public final class ContentDescriberForXML implements ITextContentDescriber { private final static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT}; + /** + * <code>restrictedMode</code> is used just for testing/experiments. + * + * If in restrictedMode, our "custom" contentType is seen as valid only in cases + * that the platform's standard one does not cover. + */ + private boolean restrictedMode = false; private IResourceCharsetDetector getDetector() { return new XMLResourceEncodingDetector(); @@ -41,7 +48,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber { public int describe(InputStream contents, IContentDescription description) throws IOException { int result = IContentDescriber.INDETERMINATE; - calculateSupportedOptions(contents, description); + result = calculateSupportedOptions(result, contents, description); return result; } @@ -55,7 +62,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber { public int describe(Reader contents, IContentDescription description) throws IOException { int result = IContentDescriber.INDETERMINATE; - calculateSupportedOptions(contents, description); + result = calculateSupportedOptions(result, contents, description); return result; } @@ -70,12 +77,14 @@ public final class ContentDescriberForXML implements ITextContentDescriber { return SUPPORTED_OPTIONS; } - private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException { + private int calculateSupportedOptions(int result, InputStream contents, IContentDescription description) throws IOException { + int returnResult = result; if (isRelevent(description)) { IResourceCharsetDetector detector = getDetector(); detector.set(contents); - handleCalculations(description, detector); + returnResult = handleCalculations(result, description, detector); } + return returnResult; } /** @@ -83,12 +92,14 @@ public final class ContentDescriberForXML implements ITextContentDescriber { * @param description * @throws IOException */ - private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException { + private int calculateSupportedOptions(int result, Reader contents, IContentDescription description) throws IOException { + int returnResult = result; if (isRelevent(description)) { IResourceCharsetDetector detector = getDetector(); detector.set(contents); - handleCalculations(description, detector); + returnResult = handleCalculations(result, description, detector); } + return returnResult; } private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) { @@ -118,7 +129,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber { private boolean isRelevent(IContentDescription description) { boolean result = false; if (description == null) - result = false; + result = true; else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK)) result = true; else if (description.isRequested(IContentDescription.CHARSET)) @@ -140,66 +151,87 @@ public final class ContentDescriberForXML implements ITextContentDescriber { * @param detector * @throws IOException */ - private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException { - // note: if we're asked for one, we set them all. I need to be sure if - // called - // mulitiple times (one for each, say) that we don't waste time - // processing same - // content again. + private int handleCalculations(int result, IContentDescription description, IResourceCharsetDetector detector) throws IOException { + int returnResult = result; EncodingMemento encodingMemento = detector.getEncodingMemento(); - // TODO: I need to verify to see if this BOM work is always done - // by text type. - Object detectedByteOrderMark = encodingMemento.getUnicodeBOM(); - if (detectedByteOrderMark != null) { - Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK); - // not sure why would ever be different, so if is different, may - // need to "push" up into base. - if (!detectedByteOrderMark.equals(existingByteOrderMark)) - description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark); - } + if (description != null) { + // TODO: I need to verify to see if this BOM work is always done + // by text type. + Object detectedByteOrderMark = encodingMemento.getUnicodeBOM(); + if (detectedByteOrderMark != null) { + Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK); + // not sure why would ever be different, so if is different, + // may + // need to "push" up into base. + if (!detectedByteOrderMark.equals(existingByteOrderMark)) + description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark); + } - if (!encodingMemento.isValid()) { - // note: after setting here, its the mere presence of - // IContentDescriptionExtended.UNSUPPORTED_CHARSET - // in the resource's description that can be used to determine if - // invalid - // in those cases, the "detected" property contains an - // "appropriate default" to use. - description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding()); - description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault()); - } + if (!encodingMemento.isValid()) { + // note: after setting here, its the mere presence of + // IContentDescriptionExtended.UNSUPPORTED_CHARSET + // in the resource's description that can be used to determine + // if invalid in those cases, the "detected" property contains + // an "appropriate default" to use. + description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding()); + description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault()); + } + + Object detectedCharset = encodingMemento.getDetectedCharsetName(); + Object javaCharset = encodingMemento.getJavaCharsetName(); + + // we always include detected, if its different than java + handleDetectedSpecialCase(description, detectedCharset, javaCharset); - Object detectedCharset = encodingMemento.getDetectedCharsetName(); - Object javaCharset = encodingMemento.getJavaCharsetName(); - - // we always include detected, if its different than java - handleDetectedSpecialCase(description, detectedCharset, javaCharset); - - if (javaCharset != null) { - Object existingCharset = description.getProperty(IContentDescription.CHARSET); - if (javaCharset.equals(existingCharset)) { - handleDetectedSpecialCase(description, detectedCharset, javaCharset); - } else { - // we may need to add what we found, but only need to add - // if different from default.the - Object defaultCharset = getDetector().getSpecDefaultEncoding(); - if (defaultCharset != null) { - if (!defaultCharset.equals(javaCharset)) { + if (javaCharset != null) { + Object existingCharset = description.getProperty(IContentDescription.CHARSET); + if (javaCharset.equals(existingCharset)) { + handleDetectedSpecialCase(description, detectedCharset, javaCharset); + } + else { + // we may need to add what we found, but only need to add + // if different from the default + Object defaultCharset = getDetector().getSpecDefaultEncoding(); + if (defaultCharset != null) { + if (!defaultCharset.equals(javaCharset)) { + description.setProperty(IContentDescription.CHARSET, javaCharset); + } + } + else { + // assuming if there is no spec default, we always + // need to add. + // TODO: this is probably a dead branch in current + // code, should re-examine for removal. description.setProperty(IContentDescription.CHARSET, javaCharset); } - } else { - // assuming if there is no spec default, we always need to - // add, I'm assuming - description.setProperty(IContentDescription.CHARSET, javaCharset); } } } - // avoid adding anything if not absolutly needed, since always - // "cached" per session - // description.setProperty(IContentDescriptionExtended.ENCODING_MEMENTO, - // encodingMemento); + returnResult = determineValidity(detector, returnResult); + return returnResult; + } + + private int determineValidity(IResourceCharsetDetector detector, int returnResult) { + // we always expect XMLResourceEncodingDetector, but just to make safe + // cast. + if (detector instanceof XMLResourceEncodingDetector) { + XMLResourceEncodingDetector xmlResourceDetector = (XMLResourceEncodingDetector) detector; + if (xmlResourceDetector.isDeclDetected()) { + if (restrictedMode ) { + // if there is no initial whitespace, then platform's + // default one will do. + if (xmlResourceDetector.hasInitialWhiteSpace()) { + returnResult = IContentDescriber.VALID; + } + } + else { + returnResult = IContentDescriber.VALID; + } + } + } + return returnResult; } } diff --git a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java index c010026b29..fdefa02dc5 100644 --- a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java +++ b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java @@ -20,6 +20,8 @@ import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector { private XMLHeadTokenizer fTokenizer; + private boolean fDeclDetected = false; + private boolean fInitialWhiteSpace = false; private boolean canHandleAsUnicodeStream(String tokenType) { boolean canHandleAsUnicodeStream = false; @@ -34,7 +36,8 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto canHandleAsUnicodeStream = true; String enc = "UTF-16BE"; //$NON-NLS-1$ createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES); - } else if (tokenType == EncodingParserConstants.UTF16LE) { + } + else if (tokenType == EncodingParserConstants.UTF16LE) { canHandleAsUnicodeStream = true; String enc = "UTF-16"; //$NON-NLS-1$ createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES); @@ -54,9 +57,9 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto private XMLHeadTokenizer getTokenizer() { // TODO: need to work on 'reset' in tokenizer, so new instance isn't // always needed - //if (fTokenizer == null) { + // if (fTokenizer == null) { fTokenizer = new XMLHeadTokenizer(); - //} + // } return fTokenizer; } @@ -75,9 +78,24 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto do { token = tokenizer.getNextToken(); tokenType = token.getType(); + + // handle xml content type detection + if (tokenType == XMLHeadTokenizerConstants.XMLDeclStart) { + fDeclDetected = true; + String declText = token.getText(); + if (declText.startsWith("<?")) { + fInitialWhiteSpace = false; + } + else { + fInitialWhiteSpace = true; + } + } + + // handle encoding detection if (canHandleAsUnicodeStream(tokenType)) { // side effect of canHandle is to create appropriate memento - } else { + } + else { if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) { if (tokenizer.hasMoreTokens()) { token = tokenizer.getNextToken(); @@ -87,13 +105,21 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto if (enc != null && enc.length() > 0) { createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT); } - } } } } - } while (tokenizer.hasMoreTokens()); + } + while (tokenizer.hasMoreTokens()); + + } + + public boolean isDeclDetected() { + return fDeclDetected; + } + public boolean hasInitialWhiteSpace() { + return fInitialWhiteSpace; } } |