fixes for unit tests and content type

author: david_williams 2005-05-25 18:12:25 +0000
committer: david_williams 2005-05-25 18:12:25 +0000
commit: c226beefc62fb7d41df170d1a27e8559de7f0996 (patch)
tree: 21c2320a4a62a7a2e44415ec2ca7a085746e5cde /bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype
parent: 2d43e9749bb107cf3238114c0156c2681b4916c4 (diff)
download: webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.gz
webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.xz
webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.zip
2 files changed, 121 insertions, 63 deletions
diff --git a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java
index fdba1fb9cc..082ef4171f 100644
--- a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java
+++ b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/ContentDescriberForXML.java
@@ -27,6 +27,13 @@ import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
 
 public final class ContentDescriberForXML implements ITextContentDescriber {
 	private final static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
+	/**
+	 * <code>restrictedMode</code> is used just for testing/experiments. 
+	 * 
+	 *  If in restrictedMode, our "custom" contentType is seen as valid only in cases
+	 *  that the platform's standard one does not cover. 
+	 */
+	private boolean restrictedMode = false;
 
 	private IResourceCharsetDetector getDetector() {
 		return new XMLResourceEncodingDetector();
@@ -41,7 +48,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
 	public int describe(InputStream contents, IContentDescription description) throws IOException {
 		int result = IContentDescriber.INDETERMINATE;
 
-		calculateSupportedOptions(contents, description);
+		result = calculateSupportedOptions(result, contents, description);
 
 		return result;
 	}
@@ -55,7 +62,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
 	public int describe(Reader contents, IContentDescription description) throws IOException {
 		int result = IContentDescriber.INDETERMINATE;
 
-		calculateSupportedOptions(contents, description);
+		result = calculateSupportedOptions(result, contents, description);
 
 		return result;
 	}
@@ -70,12 +77,14 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
 		return SUPPORTED_OPTIONS;
 	}
 
-	private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
+	private int calculateSupportedOptions(int result, InputStream contents, IContentDescription description) throws IOException {
+		int returnResult = result;
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			detector.set(contents);
-			handleCalculations(description, detector);
+			returnResult = handleCalculations(result, description, detector);
 		}
+		return returnResult;
 	}
 
 	/**
@@ -83,12 +92,14 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
 	 * @param description
 	 * @throws IOException
 	 */
-	private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
+	private int calculateSupportedOptions(int result, Reader contents, IContentDescription description) throws IOException {
+		int returnResult = result;
 		if (isRelevent(description)) {
 			IResourceCharsetDetector detector = getDetector();
 			detector.set(contents);
-			handleCalculations(description, detector);
+			returnResult = handleCalculations(result, description, detector);
 		}
+		return returnResult;
 	}
 
 	private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
@@ -118,7 +129,7 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
 	private boolean isRelevent(IContentDescription description) {
 		boolean result = false;
 		if (description == null)
-			result = false;
+			result = true;
 		else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
 			result = true;
 		else if (description.isRequested(IContentDescription.CHARSET))
@@ -140,66 +151,87 @@ public final class ContentDescriberForXML implements ITextContentDescriber {
 	 * @param detector
 	 * @throws IOException
 	 */
-	private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
-		// note: if we're asked for one, we set them all. I need to be sure if
-		// called
-		// mulitiple times (one for each, say) that we don't waste time
-		// processing same
-		// content again.
+	private int handleCalculations(int result, IContentDescription description, IResourceCharsetDetector detector) throws IOException {
+		int returnResult = result;
 		EncodingMemento encodingMemento = detector.getEncodingMemento();
-		// TODO: I need to verify to see if this BOM work is always done
-		// by text type.
-		Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
-		if (detectedByteOrderMark != null) {
-			Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
-			// not sure why would ever be different, so if is different, may
-			// need to "push" up into base.
-			if (!detectedByteOrderMark.equals(existingByteOrderMark))
-				description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
-		}
+		if (description != null) {
+			// TODO: I need to verify to see if this BOM work is always done
+			// by text type.
+			Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
+			if (detectedByteOrderMark != null) {
+				Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
+				// not sure why would ever be different, so if is different,
+				// may
+				// need to "push" up into base.
+				if (!detectedByteOrderMark.equals(existingByteOrderMark))
+					description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
+			}
 
 
-		if (!encodingMemento.isValid()) {
-			// note: after setting here, its the mere presence of
-			// IContentDescriptionExtended.UNSUPPORTED_CHARSET
-			// in the resource's description that can be used to determine if
-			// invalid
-			// in those cases, the "detected" property contains an
-			// "appropriate default" to use.
-			description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
-			description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
-		}
+			if (!encodingMemento.isValid()) {
+				// note: after setting here, its the mere presence of
+				// IContentDescriptionExtended.UNSUPPORTED_CHARSET
+				// in the resource's description that can be used to determine
+				// if invalid in those cases, the "detected" property contains
+				// an "appropriate default" to use.
+				description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
+				description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
+			}
+
+			Object detectedCharset = encodingMemento.getDetectedCharsetName();
+			Object javaCharset = encodingMemento.getJavaCharsetName();
+
+			// we always include detected, if its different than java
+			handleDetectedSpecialCase(description, detectedCharset, javaCharset);
 
-		Object detectedCharset = encodingMemento.getDetectedCharsetName();
-		Object javaCharset = encodingMemento.getJavaCharsetName();
-
-		// we always include detected, if its different than java
-		handleDetectedSpecialCase(description, detectedCharset, javaCharset);
-
-		if (javaCharset != null) {
-			Object existingCharset = description.getProperty(IContentDescription.CHARSET);
-			if (javaCharset.equals(existingCharset)) {
-				handleDetectedSpecialCase(description, detectedCharset, javaCharset);
-			} else {
-				// we may need to add what we found, but only need to add
-				// if different from default.the
-				Object defaultCharset = getDetector().getSpecDefaultEncoding();
-				if (defaultCharset != null) {
-					if (!defaultCharset.equals(javaCharset)) {
+			if (javaCharset != null) {
+				Object existingCharset = description.getProperty(IContentDescription.CHARSET);
+				if (javaCharset.equals(existingCharset)) {
+					handleDetectedSpecialCase(description, detectedCharset, javaCharset);
+				}
+				else {
+					// we may need to add what we found, but only need to add
+					// if different from the default
+					Object defaultCharset = getDetector().getSpecDefaultEncoding();
+					if (defaultCharset != null) {
+						if (!defaultCharset.equals(javaCharset)) {
+							description.setProperty(IContentDescription.CHARSET, javaCharset);
+						}
+					}
+					else {
+						// assuming if there is no spec default, we always
+						// need to add.
+						// TODO: this is probably a dead branch in current
+						// code, should re-examine for removal.
 						description.setProperty(IContentDescription.CHARSET, javaCharset);
 					}
-				} else {
-					// assuming if there is no spec default, we always need to
-					// add, I'm assuming
-					description.setProperty(IContentDescription.CHARSET, javaCharset);
 				}
 			}
 		}
 
-		// avoid adding anything if not absolutly needed, since always
-		// "cached" per session
-		// description.setProperty(IContentDescriptionExtended.ENCODING_MEMENTO,
-		// encodingMemento);
+		returnResult = determineValidity(detector, returnResult);
+		return returnResult;
+	}
+
+	private int determineValidity(IResourceCharsetDetector detector, int returnResult) {
+		// we always expect XMLResourceEncodingDetector, but just to make safe
+		// cast.
+		if (detector instanceof XMLResourceEncodingDetector) {
+			XMLResourceEncodingDetector xmlResourceDetector = (XMLResourceEncodingDetector) detector;
+			if (xmlResourceDetector.isDeclDetected()) {
+				if (restrictedMode ) {
+					// if there is no initial whitespace, then platform's
+					// default one will do.
+					if (xmlResourceDetector.hasInitialWhiteSpace()) {
+						returnResult = IContentDescriber.VALID;
+					}
+				}
+				else {
+					returnResult = IContentDescriber.VALID;
+				}
+			}
+		}
+		return returnResult;
 	}
 
 }
diff --git a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java
index c010026b29..fdefa02dc5 100644
--- a/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java
+++ b/bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java
@@ -20,6 +20,8 @@ import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
 
 public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
 	private XMLHeadTokenizer fTokenizer;
+	private boolean fDeclDetected = false;
+	private boolean fInitialWhiteSpace = false;
 
 	private boolean canHandleAsUnicodeStream(String tokenType) {
 		boolean canHandleAsUnicodeStream = false;
@@ -34,7 +36,8 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-16BE"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
-		} else if (tokenType == EncodingParserConstants.UTF16LE) {
+		}
+		else if (tokenType == EncodingParserConstants.UTF16LE) {
 			canHandleAsUnicodeStream = true;
 			String enc = "UTF-16"; //$NON-NLS-1$
 			createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
@@ -54,9 +57,9 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
 	private XMLHeadTokenizer getTokenizer() {
 		// TODO: need to work on 'reset' in tokenizer, so new instance isn't
 		// always needed
-		//if (fTokenizer == null) {
+		// if (fTokenizer == null) {
 		fTokenizer = new XMLHeadTokenizer();
-		//}
+		// }
 		return fTokenizer;
 	}
 
@@ -75,9 +78,24 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
 		do {
 			token = tokenizer.getNextToken();
 			tokenType = token.getType();
+
+			// handle xml content type detection
+			if (tokenType == XMLHeadTokenizerConstants.XMLDeclStart) {
+				fDeclDetected = true;
+				String declText = token.getText();
+				if (declText.startsWith("<?")) {
+					fInitialWhiteSpace = false;
+				}
+				else {
+					fInitialWhiteSpace = true;
+				}
+			}
+
+			// handle encoding detection
 			if (canHandleAsUnicodeStream(tokenType)) {
 				// side effect of canHandle is to create appropriate memento
-			} else {
+			}
+			else {
 				if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) {
 					if (tokenizer.hasMoreTokens()) {
 						token = tokenizer.getNextToken();
@@ -87,13 +105,21 @@ public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetecto
 							if (enc != null && enc.length() > 0) {
 								createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
 							}
-
 						}
 					}
 				}
 			}
-		} while (tokenizer.hasMoreTokens());
+		}
+		while (tokenizer.hasMoreTokens());
+
+	}
+
+	public boolean isDeclDetected() {
+		return fDeclDetected;
+	}
 
+	public boolean hasInitialWhiteSpace() {
+		return fInitialWhiteSpace;
 	}
 
 }
author	david_williams	2005-05-25 18:12:25 +0000
committer	david_williams	2005-05-25 18:12:25 +0000
commit	c226beefc62fb7d41df170d1a27e8559de7f0996 (patch)
tree	21c2320a4a62a7a2e44415ec2ca7a085746e5cde /bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype
parent	2d43e9749bb107cf3238114c0156c2681b4916c4 (diff)
download	webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.gz webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.tar.xz webtools.sourceediting-c226beefc62fb7d41df170d1a27e8559de7f0996.zip