Skip to main content

This CGIT instance is deprecated, and repositories have been moved to Gitlab or Github. See the repository descriptions for specific locations.

summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordavid_williams2005-05-25 18:12:25 +0000
committerdavid_williams2005-05-25 18:12:25 +0000
commitc226beefc62fb7d41df170d1a27e8559de7f0996 (patch)
tree21c2320a4a62a7a2e44415ec2ca7a085746e5cde /bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype
parent2d43e9749bb107cf3238114c0156c2681b4916c4 (diff)
downloadwebtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.gz
webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.xz
webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.zip
fixes for unit tests and content type
Diffstat (limited to 'bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype')
-rw-r--r--bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java146
-rw-r--r--bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java38
2 files changed, 121 insertions, 63 deletions
diff --git a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java
index fdba1fb9cc..082ef4171f 100644
--- a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java
+++ b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java
@@ -27,6 +27,13 @@ import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
public final class ContentDescriberForXML implements ITextContentDescriber {
private final static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
+ /**
+ * <code>restrictedMode</code> is used just for testing/experiments.
+ *
+ * If in restrictedMode, our "custom" contentType is seen as valid only in cases
+ * that the platform's standard one does not cover.
+ */
+ private boolean restrictedMode = false;
private IResourceCharsetDetector getDetector() {
return new XMLResourceEncodingDetector();
@@ -41,7 +48,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
public int describe(InputStream contents, IContentDescription description) throws IOException {
int result = IContentDescriber.INDETERMINATE;
- calculateSupportedOptions(contents, description);
+ result = calculateSupportedOptions(result, contents, description);
return result;
}
@@ -55,7 +62,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
public int describe(Reader contents, IContentDescription description) throws IOException {
int result = IContentDescriber.INDETERMINATE;
- calculateSupportedOptions(contents, description);
+ result = calculateSupportedOptions(result, contents, description);
return result;
}
@@ -70,12 +77,14 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
return SUPPORTED_OPTIONS;
}
- private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
+ private int calculateSupportedOptions(int result, InputStream contents, IContentDescription description) throws IOException {
+ int returnResult = result;
if (isRelevent(description)) {
IResourceCharsetDetector detector = getDetector();
detector.set(contents);
- handleCalculations(description, detector);
+ returnResult = handleCalculations(result, description, detector);
}
+ return returnResult;
}
/**
@@ -83,12 +92,14 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
* @param description
* @throws IOException
*/
- private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
+ private int calculateSupportedOptions(int result, Reader contents, IContentDescription description) throws IOException {
+ int returnResult = result;
if (isRelevent(description)) {
IResourceCharsetDetector detector = getDetector();
detector.set(contents);
- handleCalculations(description, detector);
+ returnResult = handleCalculations(result, description, detector);
}
+ return returnResult;
}
private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
@@ -118,7 +129,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
private boolean isRelevent(IContentDescription description) {
boolean result = false;
if (description == null)
- result = false;
+ result = true;
else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
result = true;
else if (description.isRequested(IContentDescription.CHARSET))
@@ -140,66 +151,87 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
* @param detector
* @throws IOException
*/
- private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
- // note: if we're asked for one, we set them all. I need to be sure if
- // called
- // mulitiple times (one for each, say) that we don't waste time
- // processing same
- // content again.
+ private int handleCalculations(int result, IContentDescription description, IResourceCharsetDetector detector) throws IOException {
+ int returnResult = result;
EncodingMemento encodingMemento = detector.getEncodingMemento();
- // TODO: I need to verify to see if this BOM work is always done
- // by text type.
- Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
- if (detectedByteOrderMark != null) {
- Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
- // not sure why would ever be different, so if is different, may
- // need to "push" up into base.
- if (!detectedByteOrderMark.equals(existingByteOrderMark))
- description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
- }
+ if (description != null) {
+ // TODO: I need to verify to see if this BOM work is always done
+ // by text type.
+ Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
+ if (detectedByteOrderMark != null) {
+ Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
+ // not sure why would ever be different, so if is different,
+ // may
+ // need to "push" up into base.
+ if (!detectedByteOrderMark.equals(existingByteOrderMark))
+ description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
+ }
- if (!encodingMemento.isValid()) {
- // note: after setting here, its the mere presence of
- // IContentDescriptionExtended.UNSUPPORTED_CHARSET
- // in the resource's description that can be used to determine if
- // invalid
- // in those cases, the "detected" property contains an
- // "appropriate default" to use.
- description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
- description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
- }
+ if (!encodingMemento.isValid()) {
+ // note: after setting here, its the mere presence of
+ // IContentDescriptionExtended.UNSUPPORTED_CHARSET
+ // in the resource's description that can be used to determine
+ // if invalid in those cases, the "detected" property contains
+ // an "appropriate default" to use.
+ description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
+ description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
+ }
+
+ Object detectedCharset = encodingMemento.getDetectedCharsetName();
+ Object javaCharset = encodingMemento.getJavaCharsetName();
+
+ // we always include detected, if its different than java
+ handleDetectedSpecialCase(description, detectedCharset, javaCharset);
- Object detectedCharset = encodingMemento.getDetectedCharsetName();
- Object javaCharset = encodingMemento.getJavaCharsetName();
-
- // we always include detected, if its different than java
- handleDetectedSpecialCase(description, detectedCharset, javaCharset);
-
- if (javaCharset != null) {
- Object existingCharset = description.getProperty(IContentDescription.CHARSET);
- if (javaCharset.equals(existingCharset)) {
- handleDetectedSpecialCase(description, detectedCharset, javaCharset);
- } else {
- // we may need to add what we found, but only need to add
- // if different from default.the
- Object defaultCharset = getDetector().getSpecDefaultEncoding();
- if (defaultCharset != null) {
- if (!defaultCharset.equals(javaCharset)) {
+ if (javaCharset != null) {
+ Object existingCharset = description.getProperty(IContentDescription.CHARSET);
+ if (javaCharset.equals(existingCharset)) {
+ handleDetectedSpecialCase(description, detectedCharset, javaCharset);
+ }
+ else {
+ // we may need to add what we found, but only need to add
+ // if different from the default
+ Object defaultCharset = getDetector().getSpecDefaultEncoding();
+ if (defaultCharset != null) {
+ if (!defaultCharset.equals(javaCharset)) {
+ description.setProperty(IContentDescription.CHARSET, javaCharset);
+ }
+ }
+ else {
+ // assuming if there is no spec default, we always
+ // need to add.
+ // TODO: this is probably a dead branch in current
+ // code, should re-examine for removal.
description.setProperty(IContentDescription.CHARSET, javaCharset);
}
- } else {
- // assuming if there is no spec default, we always need to
- // add, I'm assuming
- description.setProperty(IContentDescription.CHARSET, javaCharset);
}
}
}
- // avoid adding anything if not absolutly needed, since always
- // "cached" per session
- // description.setProperty(IContentDescriptionExtended.ENCODING_MEMENTO,
- // encodingMemento);
+ returnResult = determineValidity(detector, returnResult);
+ return returnResult;
+ }
+
+ private int determineValidity(IResourceCharsetDetector detector, int returnResult) {
+ // we always expect XMLResourceEncodingDetector, but just to make safe
+ // cast.
+ if (detector instanceof XMLResourceEncodingDetector) {
+ XMLResourceEncodingDetector xmlResourceDetector = (XMLResourceEncodingDetector) detector;
+ if (xmlResourceDetector.isDeclDetected()) {
+ if (restrictedMode ) {
+ // if there is no initial whitespace, then platform's
+ // default one will do.
+ if (xmlResourceDetector.hasInitialWhiteSpace()) {
+ returnResult = IContentDescriber.VALID;
+ }
+ }
+ else {
+ returnResult = IContentDescriber.VALID;
+ }
+ }
+ }
+ return returnResult;
}
}
diff --git a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java
index c010026b29..fdefa02dc5 100644
--- a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java
+++ b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java
@@ -20,6 +20,8 @@ import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
private XMLHeadTokenizer fTokenizer;
+ private boolean fDeclDetected = false;
+ private boolean fInitialWhiteSpace = false;
private boolean canHandleAsUnicodeStream(String tokenType) {
boolean canHandleAsUnicodeStream = false;
@@ -34,7 +36,8 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
canHandleAsUnicodeStream = true;
String enc = "UTF-16BE"; //$NON-NLS-1$
createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
- } else if (tokenType == EncodingParserConstants.UTF16LE) {
+ }
+ else if (tokenType == EncodingParserConstants.UTF16LE) {
canHandleAsUnicodeStream = true;
String enc = "UTF-16"; //$NON-NLS-1$
createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
@@ -54,9 +57,9 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
private XMLHeadTokenizer getTokenizer() {
// TODO: need to work on 'reset' in tokenizer, so new instance isn't
// always needed
- //if (fTokenizer == null) {
+ // if (fTokenizer == null) {
fTokenizer = new XMLHeadTokenizer();
- //}
+ // }
return fTokenizer;
}
@@ -75,9 +78,24 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
do {
token = tokenizer.getNextToken();
tokenType = token.getType();
+
+ // handle xml content type detection
+ if (tokenType == XMLHeadTokenizerConstants.XMLDeclStart) {
+ fDeclDetected = true;
+ String declText = token.getText();
+ if (declText.startsWith("<?")) {
+ fInitialWhiteSpace = false;
+ }
+ else {
+ fInitialWhiteSpace = true;
+ }
+ }
+
+ // handle encoding detection
if (canHandleAsUnicodeStream(tokenType)) {
// side effect of canHandle is to create appropriate memento
- } else {
+ }
+ else {
if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) {
if (tokenizer.hasMoreTokens()) {
token = tokenizer.getNextToken();
@@ -87,13 +105,21 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
if (enc != null && enc.length() > 0) {
createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
}
-
}
}
}
}
- } while (tokenizer.hasMoreTokens());
+ }
+ while (tokenizer.hasMoreTokens());
+
+ }
+
+ public boolean isDeclDetected() {
+ return fDeclDetected;
+ }
+ public boolean hasInitialWhiteSpace() {
+ return fInitialWhiteSpace;
}
}

Back to the top