Skip to main content
summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java')
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java187
1 files changed, 0 insertions, 187 deletions
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java
deleted file mode 100644
index bff22197a9..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
-import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
-import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
-import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;
-
-public class HTMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
-
- private HTMLHeadTokenizer fTokenizer;
-
- /**
- * There is no spec defined encoding for HTML (historically), so null is
- * returned.
- */
- public String getSpecDefaultEncoding() {
- return null;
- }
-
- private boolean canHandleAsUnicodeStream(String tokenType) {
- boolean canHandleAsUnicodeStream = false;
- if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
- canHandleAsUnicodeStream = true;
- String enc = "UTF-8"; //$NON-NLS-1$
- createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
- fEncodingMemento.setUTF83ByteBOMUsed(true);
- }
- else if (tokenType == EncodingParserConstants.UTF16BE) {
- canHandleAsUnicodeStream = true;
- String enc = "UTF-16BE"; //$NON-NLS-1$
- createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
- }
- else if (tokenType == EncodingParserConstants.UTF16LE) {
- canHandleAsUnicodeStream = true;
- String enc = "UTF-16"; //$NON-NLS-1$
- createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
- }
- return canHandleAsUnicodeStream;
- }
-
- /**
- * @return Returns the tokenizer.
- */
- private HTMLHeadTokenizer getTokenizer() {
- // TODO: need to work on 'reset' in tokenizer, so new instance isn't
- // always needed
- //if (fTokenizer == null) {
- fTokenizer = new HTMLHeadTokenizer();
- // }
- return fTokenizer;
- }
-
- private boolean isLegalString(String valueTokenType) {
- if (valueTokenType == null)
- return false;
- else
- return valueTokenType.equals(EncodingParserConstants.StringValue) || valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
- }
-
- protected void parseInput() throws IOException {
- checkInContent();
- if (fEncodingMemento == null) {
- checkHeuristics();
- }
- }
-
- /**
- *
- */
- private void checkHeuristics() throws IOException {
- boolean noHeuristic = false;
- String heuristicEncoding = null;
- try {
- fReader.reset();
- fReader.mark(CodedIO.MAX_MARK_SIZE);
- byte[] bytes = new byte[CodedIO.MAX_MARK_SIZE];
- int nRead = 0;
- for (int i = 0; i < bytes.length; i++) {
- if (fReader.ready()) {
- int oneByte = fReader.read();
- nRead++;
- if (oneByte <= 0xFF) {
- bytes[i] = (byte) oneByte;
- }
- else {
- noHeuristic = true;
- break;
- }
- }
- else {
- break;
- }
- }
- if (!noHeuristic) {
- heuristicEncoding = EncodingGuesser.guessEncoding(bytes, nRead);
- }
- }
- catch (IOException e) {
- // if any IO exception, then not a heuristic case
- }
- finally {
- fReader.reset();
- }
- if (heuristicEncoding != null) {
- createEncodingMemento(heuristicEncoding, EncodingMemento.GUESSED_ENCODING_FROM_STREAM);
- }
-
- }
-
- private void checkInContent() throws IOException {
- HTMLHeadTokenizer tokenizer = getTokenizer();
- tokenizer.reset(fReader);
- HeadParserToken token = null;
- String tokenType = null;
- String contentTypeValue = null;
- do {
- token = tokenizer.getNextToken();
- tokenType = token.getType();
- if (canHandleAsUnicodeStream(tokenType)) {
- // side effect of canHandle is to create appropriate
- // memento
- }
- else if (tokenType == HTMLHeadTokenizerConstants.MetaTagContentType) {
- if (tokenizer.hasMoreTokens()) {
- HeadParserToken valueToken = tokenizer.getNextToken();
- String valueTokenType = valueToken.getType();
- if (isLegalString(valueTokenType)) {
- contentTypeValue = valueToken.getText();
-
- }
- }
- }
-
- }
- while (tokenizer.hasMoreTokens());
- if (contentTypeValue != null) {
- parseContentTypeValue(contentTypeValue);
- }
- }
-
- private void parseContentTypeValue(String contentType) {
- String charset = null;
- Pattern pattern = Pattern.compile(";\\s*charset\\s*=\\s*"); //$NON-NLS-1$
- String[] parts = pattern.split(contentType);
- if (parts.length > 0) {
- // if only one item, it can still be charset instead of
- // contentType
- if (parts.length == 1) {
- if (parts[0].length() > 6) {
- String checkForCharset = parts[0].substring(0, 7);
- if (checkForCharset.equalsIgnoreCase("charset")) { //$NON-NLS-1$
- int eqpos = parts[0].indexOf('=');
- eqpos = eqpos + 1;
- if (eqpos < parts[0].length()) {
- charset = parts[0].substring(eqpos);
- charset = charset.trim();
- }
- }
- }
- }
- else {
- //fContentType = parts[0];
- }
- }
- if (parts.length > 1) {
- charset = parts[1];
- }
-
- if (charset != null && charset.length() > 0) {
- createEncodingMemento(charset, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
- }
- }
-
-} \ No newline at end of file

Back to the top