Skip to main content

This CGIT instance is deprecated, and repositories have been moved to Gitlab or Github. See the repository descriptions for specific locations.

summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype')
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/AbstractResourceEncodingDetector.java261
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ByteReader.java109
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ContentDescriberForHTML.java231
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/EncodingGuesser.java173
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizer.java1690
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizerConstants.java20
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java187
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HeadParserToken.java44
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/IntStack.java99
-rw-r--r--bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/NullMemento.java37
10 files changed, 0 insertions, 2851 deletions
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/AbstractResourceEncodingDetector.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/AbstractResourceEncodingDetector.java
deleted file mode 100644
index 66b6bc2b66..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/AbstractResourceEncodingDetector.java
+++ /dev/null
@@ -1,261 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2001, 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- * Jens Lukowski/Innoopract - initial renaming/restructuring
- *
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-import java.io.BufferedInputStream;
-import java.io.BufferedReader;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-import java.nio.charset.Charset;
-import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.UnsupportedCharsetException;
-
-import org.eclipse.core.resources.IStorage;
-import org.eclipse.core.runtime.CoreException;
-import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
-import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
-import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
-
-
-public abstract class AbstractResourceEncodingDetector implements IResourceCharsetDetector {
-
- protected EncodingMemento fEncodingMemento;
-
- protected boolean fHeaderParsed;
-
- protected Reader fReader;
-
- /**
- *
- */
- public AbstractResourceEncodingDetector() {
- super();
- }
-
- /**
- * Note: once this instance is created, trace info still needs to be
- * appended by caller, depending on the context its created.
- */
- private void createEncodingMemento(String detectedCharsetName) {
- fEncodingMemento = new EncodingMemento();
- fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName));
- fEncodingMemento.setDetectedCharsetName(detectedCharsetName);
- // TODO: if detectedCharset and spec default is
- // null, need to use "work
- // bench based" defaults.
- fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding());
- }
-
- /**
- * convience method all subclasses can use (but not override)
- *
- * @param detectedCharsetName
- * @param reason
- */
- final protected void createEncodingMemento(String detectedCharsetName, String reason) {
- createEncodingMemento(detectedCharsetName);
- }
-
- /**
- * convience method all subclasses can use (but not override)
- */
- final protected void ensureInputSet() {
- if (fReader == null) {
- throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
- }
- }
-
- /**
- * This method can return null, if invalid charset name (in which case
- * "appropriateDefault" should be used, if a name is really need for some
- * "save anyway" cases).
- *
- * @param detectedCharsetName
- * @return
- */
- private String getAppropriateJavaCharset(String detectedCharsetName) {
- String result = null;
- // 1. Check explicit mapping overrides from
- // property file -- its here we pick up "rules" for cases
- // that are not even in Java
- result = CodedIO.checkMappingOverrides(detectedCharsetName);
- // 2. Use the "canonical" name from JRE mappings
- // Note: see Charset JavaDoc, the name you get one
- // with can be alias,
- // the name you get back is "standard" name.
- Charset javaCharset = null;
- try {
- javaCharset = Charset.forName(detectedCharsetName);
- }
- catch (UnsupportedCharsetException e) {
- // only set invalid, if result is same as detected -- they won't
- // be equal if
- // overridden
- if (result != null && result.equals(detectedCharsetName)) {
- fEncodingMemento.setInvalidEncoding(detectedCharsetName);
- }
- }
- catch (IllegalCharsetNameException e) {
- // only set invalid, if result is same as detected -- they won't
- // be equal if
- // overridden
- if (result != null && result.equals(detectedCharsetName)) {
- fEncodingMemento.setInvalidEncoding(detectedCharsetName);
- }
- }
- // give priority to java cononical name, if present
- if (javaCharset != null) {
- result = javaCharset.name();
- // but still allow overrides
- result = CodedIO.checkMappingOverrides(result);
- }
- return result;
- }
-
- public String getEncoding() throws IOException {
- return getEncodingMemento().getDetectedCharsetName();
- }
-
- // to ensure consist overall rules used, we'll mark as
- // final,
- // and require subclasses to provide certain pieces of
- // the
- // implementation
- public EncodingMemento getEncodingMemento() throws IOException {
- ensureInputSet();
- if (!fHeaderParsed) {
- parseInput();
- // we keep track of if header's already been
- // parse, so can make
- // multiple 'get' calls, without causing
- // reparsing.
- fHeaderParsed = true;
- // Note: there is a "hidden assumption" here
- // that an empty
- // string in content should be treated same as
- // not present.
- }
- if (fEncodingMemento == null) {
- handleSpecDefault();
- }
- if (fEncodingMemento == null) {
- // safty net
- fEncodingMemento = new NullMemento();
- }
- return fEncodingMemento;
- }
-
- /**
- * This is to return a default encoding -- as specified by an industry
- * content type spec -- when not present in the stream, for example, XML
- * specifies UTF-8, JSP specifies ISO-8859-1. This method should return
- * null if there is no such "spec default".
- */
- abstract public String getSpecDefaultEncoding();
-
- public EncodingMemento getSpecDefaultEncodingMemento() {
- resetAll();
- EncodingMemento result = null;
- String enc = getSpecDefaultEncoding();
- if (enc != null) {
- createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
- fEncodingMemento.setAppropriateDefault(enc);
- result = fEncodingMemento;
- }
- return result;
- }
-
- private void handleSpecDefault() {
- String encodingName;
- encodingName = getSpecDefaultEncoding();
- if (encodingName != null) {
- // createEncodingMemento(encodingName,
- // EncodingMemento.USED_CONTENT_TYPE_DEFAULT);
- fEncodingMemento = new EncodingMemento();
- fEncodingMemento.setJavaCharsetName(encodingName);
- fEncodingMemento.setAppropriateDefault(encodingName);
- }
- }
-
- /**
- * Every subclass must provide a way to parse the input. This method has
- * several critical responsibilities:
- * <li>set the fEncodingMemento field appropriately, according to the
- * results of the parse of fReader.</li>
- * <li>set fHarderParsed to true, to avoid wasted re-parsing.</li>
- */
- abstract protected void parseInput() throws IOException;
-
- /**
- *
- */
- private void resetAll() {
- fReader = null;
- fHeaderParsed = false;
- fEncodingMemento = null;
- }
-
- /**
- *
- */
- public void set(InputStream inputStream) {
- resetAll();
- fReader = new ByteReader(inputStream);
- try {
- fReader.mark(CodedIO.MAX_MARK_SIZE);
- }
- catch (IOException e) {
- // impossible, since we know ByteReader
- // supports marking
- throw new Error(e);
- }
- }
-
- /**
- *
- */
- public void set(IStorage iStorage) throws CoreException {
- resetAll();
- InputStream inputStream = iStorage.getContents();
- InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
- resettableStream.mark(CodedIO.MAX_MARK_SIZE);
- set(resettableStream);
- // TODO we'll need to "remember" IFile, or
- // get its (or its project's) settings, in case
- // those are needed to handle cases when the
- // encoding is not in the file stream.
- }
-
- /**
- * Note: this is not part of interface to help avoid confusion ... it
- * expected this Reader is a well formed character reader ... that is, its
- * all ready been determined to not be a unicode marked input stream. And,
- * its assumed to be in the correct position, at position zero, ready to
- * read first character.
- */
- public void set(Reader reader) {
- resetAll();
- fReader = reader;
- if (!fReader.markSupported()) {
- fReader = new BufferedReader(fReader);
- }
- try {
- fReader.mark(CodedIO.MAX_MARK_SIZE);
- }
- catch (IOException e) {
- // impossble, since we just checked if markable
- throw new Error(e);
- }
- }
-}
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ByteReader.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ByteReader.java
deleted file mode 100644
index bfca51af2a..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ByteReader.java
+++ /dev/null
@@ -1,109 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2001, 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- * Jens Lukowski/Innoopract - initial renaming/restructuring
- *
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-
-import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
-
-/**
- * This is an "adapter" class, simply to get in input stream to act like a
- * reader. We could not use InputStreamReader directly because its internal
- * buffers are not controllable, and it sometimes pulls too much out of input
- * stream (even when it wasn't needed for our purposes).
- *
- * The use of this class is highly specialized and by not means meant to be
- * general purpose. Its use is restricted to those cases where the input
- * stream can be regarded as ascii just long enough to determine what the real
- * encoding should be.
- */
-
-public class ByteReader extends Reader {
-
-
- public static final int DEFAULT_BUFFER_SIZE = CodedIO.MAX_BUF_SIZE;
-
- protected byte[] fBuffer;
-
- protected InputStream fInputStream;
-
- protected ByteReader() {
- super();
- }
-
- public ByteReader(InputStream inputStream) {
- this(inputStream, DEFAULT_BUFFER_SIZE);
- if (!inputStream.markSupported()) {
- throw new IllegalArgumentException("ByteReader is required to have a resettable stream"); //$NON-NLS-1$
- }
- }
-
- public ByteReader(InputStream inputStream, int size) {
- this.fInputStream = inputStream;
- if (!inputStream.markSupported()) {
- throw new IllegalArgumentException("ByteReader is required to have a resettable stream"); //$NON-NLS-1$
- }
- this.fBuffer = new byte[size];
-
- }
-
- public void close() throws IOException {
- this.fInputStream.close();
- }
-
- public void mark(int readAheadLimit) {
- this.fInputStream.mark(readAheadLimit);
- }
-
- public boolean markSupported() {
- return true;
- }
-
- public int read() throws IOException {
- int b0 = this.fInputStream.read();
- return (b0 & 0x00FF);
- }
-
- public int read(char ch[], int offset, int length) throws IOException {
- if (length > this.fBuffer.length) {
- length = this.fBuffer.length;
- }
-
- int count = this.fInputStream.read(this.fBuffer, 0, length);
-
- for (int i = 0; i < count; i++) {
- int b0 = this.fBuffer[i];
- // the 0x00FF is to "lose" the negative bits filled in the byte to
- // int conversion
- // (and which would be there if cast directly from byte to char).
- char c0 = (char) (b0 & 0x00FF);
- ch[offset + i] = c0;
- }
- return count;
- }
-
- public boolean ready() throws IOException {
- return this.fInputStream.available() > 0;
- }
-
- public void reset() throws IOException {
- this.fInputStream.reset();
- }
-
- public long skip(long n) throws IOException {
- return this.fInputStream.skip(n);
- }
-
-}
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ContentDescriberForHTML.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ContentDescriberForHTML.java
deleted file mode 100644
index 376d2f8793..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/ContentDescriberForHTML.java
+++ /dev/null
@@ -1,231 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.Reader;
-
-import org.eclipse.core.runtime.QualifiedName;
-import org.eclipse.core.runtime.content.IContentDescriber;
-import org.eclipse.core.runtime.content.IContentDescription;
-import org.eclipse.core.runtime.content.ITextContentDescriber;
-import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
-import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
-import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
-
-/**
- *
- * ContentDescriberForHTML
- *
- * A few design principles to remember with content describers:
- * <ul>
- * <li>Remember not to store values/data in the descriptions array of properties,
- * especially not large objects! and even no value that is already the default value,
- * since those description properties are cached per session, so can add up in memory.
- * <li>Remember that a ContentDescriber instance becomes a "root object" in the
- * ContentDescriberManager (that is, always in memory, never GC'd), so it should
- * not have any instance or state data since it would always become stale and
- * "hold on" to objects unneccessarily.
- * </ul>
- */
-
-public final class ContentDescriberForHTML implements ITextContentDescriber {
-
- final private static QualifiedName[] SUPPORTED_OPTIONS = {IContentDescription.CHARSET, IContentDescription.BYTE_ORDER_MARK, IContentDescriptionExtended.DETECTED_CHARSET, IContentDescriptionExtended.UNSUPPORTED_CHARSET, IContentDescriptionExtended.APPROPRIATE_DEFAULT};
-
- public int describe(InputStream contents, IContentDescription description) throws IOException {
- int result = IContentDescriber.INDETERMINATE;
-
- if (description == null) {
- result = computeValidity(contents);
- }
- else {
- calculateSupportedOptions(contents, description);
- // assummming we should return same 'validity' value we did
- // when called before. (technically, could be a performance issue
- // in future, so might want to check if any 'ol value would
- // be ok here.
- result = computeValidity(contents);
- }
-
- return result;
- }
-
- public int describe(Reader contents, IContentDescription description) throws IOException {
- int result = IContentDescriber.INDETERMINATE;
-
- if (description == null) {
- result = computeValidity(contents);
- }
- else {
- calculateSupportedOptions(contents, description);
- // assummming we should return same 'validity' value we did
- // when called before. (technically, could be a performance issue
- // in future, so might want to check if hard coded 'valid' would
- // be ok here.
- result = computeValidity(contents);
- }
-
- return result;
- }
-
- public QualifiedName[] getSupportedOptions() {
-
- return SUPPORTED_OPTIONS;
- }
-
- private void calculateSupportedOptions(InputStream contents, IContentDescription description) throws IOException {
- if (isRelevent(description)) {
- IResourceCharsetDetector detector = getDetector();
- detector.set(contents);
- handleCalculations(description, detector);
- }
- }
-
- /**
- * @param contents
- * @param description
- * @throws IOException
- */
- private void calculateSupportedOptions(Reader contents, IContentDescription description) throws IOException {
- if (isRelevent(description)) {
- IResourceCharsetDetector detector = getDetector();
- detector.set(contents);
- handleCalculations(description, detector);
- }
- }
-
- private int computeValidity(InputStream inputStream) {
- // currently no contents specific check for valid HTML contents
- // (this may change once we add XHTML content type)
- return IContentDescriber.INDETERMINATE;
- }
-
- private int computeValidity(Reader reader) {
- // currently no contents specific check for valid HTML contents
- // (this may change once we add XHTML content type)
- return IContentDescriber.INDETERMINATE;
- }
-
- private IResourceCharsetDetector getDetector() {
-
- return new HTMLResourceEncodingDetector();
-
- }
-
- /**
- * @param description
- * @param detector
- * @throws IOException
- */
- private void handleCalculations(IContentDescription description, IResourceCharsetDetector detector) throws IOException {
-
- EncodingMemento encodingMemento = ((HTMLResourceEncodingDetector) detector).getEncodingMemento();
- // TODO: I need to verify to see if this BOM work is always done
- // by text type.
- Object detectedByteOrderMark = encodingMemento.getUnicodeBOM();
- if (detectedByteOrderMark != null) {
- Object existingByteOrderMark = description.getProperty(IContentDescription.BYTE_ORDER_MARK);
- // not sure why would ever be different, so if is different, may
- // need to "push" up into base.
- if (!detectedByteOrderMark.equals(existingByteOrderMark))
- description.setProperty(IContentDescription.BYTE_ORDER_MARK, detectedByteOrderMark);
- }
-
-
- if (!encodingMemento.isValid()) {
- /*
- * note: after setting here, its the mere presence of
- * IContentDescriptionExtended.UNSUPPORTED_CHARSET in the
- * resource's description that can be used to determine if invalid
- * in those cases, the "detected" property contains an
- * "appropriate default" to use.
- */
- description.setProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET, encodingMemento.getInvalidEncoding());
- description.setProperty(IContentDescriptionExtended.APPROPRIATE_DEFAULT, encodingMemento.getAppropriateDefault());
- }
-
- Object detectedCharset = encodingMemento.getDetectedCharsetName();
- Object javaCharset = encodingMemento.getJavaCharsetName();
-
- // we always include detected, if its different than java
- handleDetectedSpecialCase(description, detectedCharset, javaCharset);
-
- if (javaCharset != null) {
- Object existingCharset = description.getProperty(IContentDescription.CHARSET);
- if (javaCharset.equals(existingCharset)) {
- handleDetectedSpecialCase(description, detectedCharset, javaCharset);
- }
- else {
- // we may need to add what we found, but only need to add
- // if different from default.the
- Object defaultCharset = detector.getSpecDefaultEncoding();
- if (defaultCharset != null) {
- if (!defaultCharset.equals(javaCharset)) {
- description.setProperty(IContentDescription.CHARSET, javaCharset);
- }
- }
- else {
- // assuming if there is no spec default, we always need to
- // add, I'm assuming
- description.setProperty(IContentDescription.CHARSET, javaCharset);
- }
- }
- }
-
- }
-
- private void handleDetectedSpecialCase(IContentDescription description, Object detectedCharset, Object javaCharset) {
- // since equal, we don't need to add, but if our detected version is
- // different than
- // javaCharset, then we should add it. This will happen, for example,
- // if there's
- // differences in case, or differences due to override properties
- if (detectedCharset != null) {
- // if (!detectedCharset.equals(javaCharset)) {
- // description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET,
- // detectedCharset);
- // }
-
- // Once we detected a charset, we should set the property even
- // though it's the same as javaCharset
- // because there are clients that rely on this property to
- // determine if the charset is actually detected in file or not.
- description.setProperty(IContentDescriptionExtended.DETECTED_CHARSET, detectedCharset);
- }
- }
-
- /**
- * @param description
- * @return
- */
- private boolean isRelevent(IContentDescription description) {
- boolean result = false;
- if (description == null)
- result = false;
- else if (description.isRequested(IContentDescription.BYTE_ORDER_MARK))
- result = true;
- else if (description.isRequested(IContentDescription.CHARSET))
- result = true;
- else if (description.isRequested(IContentDescriptionExtended.APPROPRIATE_DEFAULT))
- result = true;
- else if (description.isRequested(IContentDescriptionExtended.DETECTED_CHARSET))
- result = true;
- else if (description.isRequested(IContentDescriptionExtended.UNSUPPORTED_CHARSET))
- result = true;
- // else if
- // (description.isRequested(IContentDescriptionExtended.ENCODING_MEMENTO))
- // result = true;
- return result;
- }
-
-} \ No newline at end of file
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/EncodingGuesser.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/EncodingGuesser.java
deleted file mode 100644
index be7f3a05eb..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/EncodingGuesser.java
+++ /dev/null
@@ -1,173 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2001, 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- * Jens Lukowski/Innoopract - initial renaming/restructuring
- *
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-/**
- *
- * This is ported from PageDesigner's hpbcom/Kanji.cpp's
- * Kanji::guess_kanji_code(),
- *
- */
-public class EncodingGuesser {
- private static final int ASCII = 0; // ASCII
- // ISO-2022-JP
- private static final int ASCII_IN = 8; // This is after ISO2022's change
- // Shift-JIS
- private static final int EUC_HALFKANA = 6; // This is Half Kana in EUC-JP
- private static final int EUC_JP = 3; // This is EUC-JP
- private static final int ISO2022_JP = 4; // This is ISO-2022-JP
- private static final int JIS_HALFKANA = 7; // THis is Half Kana in
- private static final byte KT_EUC1 = 0x40;
- private static final byte KT_EUC2 = (byte) 0x80;
- // ASCII
- private static final byte KT_JIN = 0x01;
- private static final byte KT_JOUT = 0x02;
- // private static final byte KT_ESC = 0x04;
- // private static final byte KT_JIS = 0x08;
- private static final byte KT_SFT1 = 0x10;
- private static final byte KT_SFT2 = 0x20;
- private static final byte ktype[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 00 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 10 */
- 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x08, 0x08, 0x09, 0x08, 0x08, 0x08, /* !"#$%&' *//* " */
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, /* ()*+,-./ */
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, /* 01234567 */
- 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, 0x08, /* 89:; <=>? */
- 0x29, 0x28, 0x2b, 0x28, 0x28, 0x28, 0x28, 0x28, /* @ABCDEFG */
- 0x2a, 0x28, 0x2a, 0x28, 0x28, 0x28, 0x28, 0x28, /* HIJKLMNO */
- 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, /* PQRSTUVW */
- 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, /* XYZ[\]^_ */
- 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, /* abcdefg */
- 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, /* hijklmno */
- 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, /* pqrstuvw */
- 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x28, 0x00, /* xyz{|}~ */
- 0x20, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* 80 */
- 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, /* 90 */
- 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x30, 0x20, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, /* A0 */
- (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, /* B0 */
- (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, /* C0 */
- (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, /* D0 */
- (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xe0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, /* E0 */
- (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, /* F0 */
- (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xf0, (byte) 0xc0, (byte) 0xc0, 0x00,};
- // private static final int ISO8859_1 = 1; // ISO-1
- private static final int SHIFT_JIS = 2; // This is Shift-JIS
- private static final int SJIS_HALFKANA = 5; // This is Half Kana in
-
- /**
- * Currently, only Japanese encodings are supported.
- */
- private static final int UNKNOWN = -1; // Unknown
-
- /**
- * @return java.lang.String
- * @param code
- * int
- *
- * Convert private int to IANA Encoding name.
- */
- private static String convertToIANAEncodingName(int code) {
- String encoding = null;
-
- switch (code) {
- case SHIFT_JIS :
- case SJIS_HALFKANA :
- encoding = "Shift_JIS";//$NON-NLS-1$
- break;
- case EUC_JP :
- case EUC_HALFKANA :
- encoding = "EUC-JP";//$NON-NLS-1$
- break;
- case ISO2022_JP :
- case JIS_HALFKANA :
- encoding = "ISO-2022-JP";//$NON-NLS-1$
- default :
- break;
- }
-
- return encoding;
- }
-
- /**
- * Return guessed Java Encoding name target: bytes to be inspected length:
- * length of target
- */
- public static String guessEncoding(byte[] target, int length) {
- int code = UNKNOWN;
-
- // Currently, only Japanese is supported.
- String system_ctype = java.util.Locale.getDefault().getLanguage();
- String jp_ctype = java.util.Locale.JAPANESE.getLanguage();
- if (system_ctype.compareTo(jp_ctype) == 0) {
- // Ok, I'm under ja_JP.
- code = ASCII;
- int pos = 0;
- while ((code == ASCII) && (length > 0)) {
- int ch1 = target[pos];
- ch1 = ch1 & 0x000000FF;
- int ch2 = (length >= 2) ? target[pos + 1] : 0;
- ch2 = ch2 & 0x000000FF;
- int ch3 = (length >= 3) ? target[pos + 2] : 0;
- ch3 = ch3 & 0x000000FF;
- code = guessJapaneseKanjiCode(ch1, ch2, ch3, 0);
- pos++;
- length--;
- }
- switch (code) {
- case ISO2022_JP :
- case JIS_HALFKANA :
- code = ISO2022_JP;
- break;
- case EUC_JP :
- code = EUC_JP;
- break;
- default :
- code = SHIFT_JIS;
- }
- }
- return (convertToIANAEncodingName(code));
- }
-
- /**
- * Guess the encoding. halfkana_flag = 0x01 ( detect SJIS half kana )
- * halfkana_flag = 0x02 ( detect EUC half kana )
- */
- private static int guessJapaneseKanjiCode(int ch1, int ch2, int ch3, int halfkana_flag) {
- boolean sjis_hankaku_flag = ((halfkana_flag & 0x01) != 0) ? true : false;
- boolean euc_hankaku_flag = ((halfkana_flag & 0x02) != 0) ? true : false;
-
- if (ch1 == 0)
- return UNKNOWN;
- if (sjis_hankaku_flag && ch1 >= 0xa1 && ch1 <= 0xdf)
- return SJIS_HALFKANA;
- else if (euc_hankaku_flag && ch1 == 0x8e && ch2 >= 0xa1 && ch2 <= 0xdf)
- return EUC_HALFKANA;
- else if (((ktype[ch1] & KT_SFT1) != 0) && ((ktype[ch2] & KT_SFT2) != 0))
- return SHIFT_JIS;
- else if (((ktype[ch1] & KT_EUC1) != 0) && ((ktype[ch2] & KT_EUC2) != 0))
- return EUC_JP;
- else if (ch1 == 0x1b && ((ktype[ch2] & KT_JIN) != 0))
- return ISO2022_JP;
- else if (ch1 >= 0xa1 && ch1 <= 0xdf)
- return SJIS_HALFKANA;
- else if (ch1 == 0x1b && ch2 == 0x28/* '(' */&& ch3 == 0x49/* 'I' */)
- return JIS_HALFKANA;
- else if (ch1 == 0x1b && ch2 == 0x28/* '(' */&& ((ktype[ch3] & KT_JOUT) != 0))
- return ASCII_IN;
-
- return ASCII;
- }
-
- public EncodingGuesser() {
- super();
- }
-}
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizer.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizer.java
deleted file mode 100644
index dc68f674ca..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizer.java
+++ /dev/null
@@ -1,1690 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-/* The following code was generated by JFlex 1.2.2 on 1/27/04 6:41 PM */
-
-/*nlsXXX*/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-import java.io.IOException;
-import java.io.Reader;
-
-import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;
-import org.eclipse.wst.xml.core.internal.contenttype.XMLHeadTokenizerConstants;
-
-
-
-/**
- * This class is a scanner generated by
- * <a href="http://www.informatik.tu-muenchen.de/~kleing/jflex/">JFlex</a> 1.2.2
- * on 1/27/04 6:41 PM from the specification file
- * <tt>file:/D:/DevTimeSupport/HeadParsers/HTMLHeadTokenizer/HTMLHeadTokenizer.jflex</tt>
- */
-public class HTMLHeadTokenizer {
-
- /** this character denotes the end of file */
- final public static int YYEOF = -1;
-
- /** lexical states */
- final public static int ST_META_TAG = 4;
- final public static int YYINITIAL = 0;
- final public static int UnDelimitedString = 12;
- final public static int DQ_STRING = 8;
- final public static int SQ_STRING = 10;
- final public static int ST_XMLDecl = 2;
- final public static int QuotedAttributeValue = 6;
-
- /**
- * YY_LEXSTATE[l] is the state in the DFA for the lexical state l
- * YY_LEXSTATE[l+1] is the state in the DFA for the lexical state l
- * at the beginning of a line
- * l is of the form l = 2*k, k a non negative integer
- */
- private final static int YY_LEXSTATE[] = {0, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7};
-
- /**
- * Translates characters to character classes
- */
- final private static String yycmap_packed = "\11\0\1\6\1\7\2\0\1\11\22\0\1\22\1\0\1\40\2\0" + "\1\44\1\0\1\43\5\0\1\34\1\0\1\42\13\0\1\45\1\12" + "\1\10\1\31\1\13\1\0\1\21\1\0\1\24\1\26\1\17\1\0" + "\1\30\1\32\1\27\2\0\1\16\1\15\1\23\1\25\1\33\1\35" + "\2\0\1\20\1\36\1\37\1\0\1\14\1\41\7\0\1\21\1\0" + "\1\24\1\26\1\17\1\0\1\30\1\32\1\27\2\0\1\16\1\15" + "\1\23\1\25\1\33\1\35\2\0\1\20\1\36\1\37\1\0\1\14" + "\1\41\101\0\1\4\3\0\1\5\17\0\1\3\16\0\1\1\20\0" + "\1\3\16\0\1\1\1\2\170\0\1\2\ufe87\0";
-
- /**
- * Translates characters to character classes
- */
- final private static char[] yycmap = yy_unpack_cmap(yycmap_packed);
-
-
- /* error codes */
- final private static int YY_UNKNOWN_ERROR = 0;
- final private static int YY_ILLEGAL_STATE = 1;
- final private static int YY_NO_MATCH = 2;
- final private static int YY_PUSHBACK_2BIG = 3;
-
- /* error messages for the codes above */
- final private static String YY_ERROR_MSG[] = {"Unkown internal scanner error", "Internal error: unknown state", "Error: could not match input", "Error: pushback value was too large"};
-
- /** the input device */
- private java.io.Reader yy_reader;
-
- /** the current state of the DFA */
- private int yy_state;
-
- /** the current lexical state */
- private int yy_lexical_state = YYINITIAL;
-
- /** this buffer contains the current text to be matched and is
- the source of the yytext() string */
- private char yy_buffer[] = new char[16384];
-
- /** the textposition at the last accepting state */
- private int yy_markedPos;
-
- /** the textposition at the last state to be included in yytext */
- private int yy_pushbackPos;
-
- /** the current text position in the buffer */
- private int yy_currentPos;
-
- /** startRead marks the beginning of the yytext() string in the buffer */
- private int yy_startRead;
-
- /** endRead marks the last character in the buffer, that has been read
- from input */
- private int yy_endRead;
-
-
- /** the number of characters up to the start of the matched text */
- private int yychar;
-
-
- /**
- * yy_atBOL == true <=> the scanner is currently at the beginning of a line
- */
- private boolean yy_atBOL;
-
- /** yy_atEOF == true <=> the scanner has returned a value for EOF */
- private boolean yy_atEOF;
-
- /** denotes if the user-EOF-code has already been executed */
- private boolean yy_eof_done;
-
- /* user code: */
-
-
- private boolean hasMore = true;
- private final static int MAX_TO_SCAN = 8000;
- StringBuffer string = new StringBuffer();
- // state stack for easier state handling
- private IntStack fStateStack = new IntStack();
- private String valueText = null;
- boolean foundContentTypeValue = false;
-
-
-
- public HTMLHeadTokenizer() {
- super();
- }
-
- public void reset(Reader in) {
- /* the input device */
- yy_reader = in;
-
- /* the current state of the DFA */
- yy_state = 0;
-
- /* the current lexical state */
- yy_lexical_state = YYINITIAL;
-
- /* this buffer contains the current text to be matched and is
- the source of the yytext() string */
- java.util.Arrays.fill(yy_buffer, (char) 0);
-
- /* the textposition at the last accepting state */
- yy_markedPos = 0;
-
- /* the textposition at the last state to be included in yytext */
- yy_pushbackPos = 0;
-
- /* the current text position in the buffer */
- yy_currentPos = 0;
-
- /* startRead marks the beginning of the yytext() string in the buffer */
- yy_startRead = 0;
-
- /**
- * endRead marks the last character in the buffer, that has been read
- * from input
- */
- yy_endRead = 0;
-
-
- /* the number of characters up to the start of the matched text */
- yychar = 0;
-
-
- /**
- * yy_atBOL == true <=> the scanner is currently at the beginning
- * of a line
- */
- yy_atBOL = false;
-
- /* yy_atEOF == true <=> the scanner has returned a value for EOF */
- yy_atEOF = false;
-
- /* denotes if the user-EOF-code has already been executed */
- yy_eof_done = false;
-
-
- fStateStack.clear();
-
- hasMore = true;
-
- // its a little wasteful to "throw away" first char array generated
- // by class init (via auto generated code), but we really do want
- // a small buffer for our head parsers.
- if (yy_buffer.length != MAX_TO_SCAN) {
- yy_buffer = new char[MAX_TO_SCAN];
- }
-
-
- }
-
-
- public final HeadParserToken getNextToken() throws IOException {
- String context = null;
- context = primGetNextToken();
- HeadParserToken result = null;
- if (valueText != null) {
- result = createToken(context, yychar, valueText);
- valueText = null;
- }
- else {
- result = createToken(context, yychar, yytext());
- }
- return result;
- }
-
- public final boolean hasMoreTokens() {
- return hasMore && yychar < MAX_TO_SCAN;
- }
-
- private void pushCurrentState() {
- fStateStack.push(yystate());
-
- }
-
- private void popState() {
- yybegin(fStateStack.pop());
- }
-
- private HeadParserToken createToken(String context, int start, String text) {
- return new HeadParserToken(context, start, text);
- }
-
-
-
- /**
- * Creates a new scanner
- * There is also a java.io.InputStream version of this constructor.
- *
- * @param in the java.io.Reader to read input from.
- */
- public HTMLHeadTokenizer(java.io.Reader in) {
- this.yy_reader = in;
- }
-
- /**
- * Creates a new scanner.
- * There is also java.io.Reader version of this constructor.
- *
- * @param in the java.io.Inputstream to read input from.
- */
- public HTMLHeadTokenizer(java.io.InputStream in) {
- this(new java.io.InputStreamReader(in));
- }
-
- /**
- * Unpacks the compressed character translation table.
- *
- * @param packed the packed character translation table
- * @return the unpacked character translation table
- */
- private static char[] yy_unpack_cmap(String packed) {
- char[] map = new char[0x10000];
- int i = 0; /* index in packed string */
- int j = 0; /* index in unpacked array */
- while (i < 174) {
- int count = packed.charAt(i++);
- char value = packed.charAt(i++);
- do
- map[j++] = value;
- while (--count > 0);
- }
- return map;
- }
-
-
- /**
- * Gets the next input character.
- *
- * @return the next character of the input stream, EOF if the
- * end of the stream is reached.
- * @exception IOException if any I/O-Error occurs
- */
- private int yy_advance() throws java.io.IOException {
-
- /* standard case */
- if (yy_currentPos < yy_endRead)
- return yy_buffer[yy_currentPos++];
-
- /* if the eof is reached, we don't need to work hard */
- if (yy_atEOF)
- return YYEOF;
-
- /* otherwise: need to refill the buffer */
-
- /* first: make room (if you can) */
- if (yy_startRead > 0) {
- System.arraycopy(yy_buffer, yy_startRead, yy_buffer, 0, yy_endRead - yy_startRead);
-
- /* translate stored positions */
- yy_endRead -= yy_startRead;
- yy_currentPos -= yy_startRead;
- yy_markedPos -= yy_startRead;
- yy_pushbackPos -= yy_startRead;
- yy_startRead = 0;
- }
-
- /* is the buffer big enough? */
- if (yy_currentPos >= yy_buffer.length) {
- /* if not: blow it up */
- char newBuffer[] = new char[yy_currentPos * 2];
- System.arraycopy(yy_buffer, 0, newBuffer, 0, yy_buffer.length);
- yy_buffer = newBuffer;
- }
-
- /* finally: fill the buffer with new input */
- int numRead = yy_reader.read(yy_buffer, yy_endRead, yy_buffer.length - yy_endRead);
-
- if (numRead == -1)
- return YYEOF;
-
- yy_endRead += numRead;
-
- return yy_buffer[yy_currentPos++];
- }
-
-
- /**
- * Closes the input stream.
- */
- final public void yyclose() throws java.io.IOException {
- yy_atEOF = true; /* indicate end of file */
- yy_endRead = yy_startRead; /* invalidate buffer */
- yy_reader.close();
- }
-
-
- /**
- * Returns the current lexical state.
- */
- final public int yystate() {
- return yy_lexical_state;
- }
-
- /**
- * Enters a new lexical state
- *
- * @param newState the new lexical state
- */
- final public void yybegin(int newState) {
- yy_lexical_state = newState;
- }
-
-
- /**
- * Returns the text matched by the current regular expression.
- */
- final public String yytext() {
- return new String(yy_buffer, yy_startRead, yy_markedPos - yy_startRead);
- }
-
- /**
- * Returns the length of the matched text region.
- */
- final public int yylength() {
- return yy_markedPos - yy_startRead;
- }
-
-
- /**
- * Reports an error that occured while scanning.
- *
- * @param errorCode the code of the errormessage to display
- */
- private void yy_ScanError(int errorCode) {
- try {
- System.out.println(YY_ERROR_MSG[errorCode]);
- }
- catch (ArrayIndexOutOfBoundsException e) {
- System.out.println(YY_ERROR_MSG[YY_UNKNOWN_ERROR]);
- }
-
- System.exit(1);
- }
-
-
- /**
- * Pushes the specified amount of characters back into the input stream.
- *
- * They will be read again by then next call of the scanning method
- *
- * @param number the number of characters to be read again.
- * This number must not be greater than yylength()!
- */
- private void yypushback(int number) {
- if (number > yylength())
- yy_ScanError(YY_PUSHBACK_2BIG);
-
- yy_markedPos -= number;
- }
-
-
- /**
- * Contains user EOF-code, which will be executed exactly once,
- * when the end of file is reached
- */
- private void yy_do_eof() {
- if (!yy_eof_done) {
- yy_eof_done = true;
- hasMore = false;
-
- }
- }
-
-
- /**
- * Resumes scanning until the next regular expression is matched,
- * the end of input is encountered or an I/O-Error occurs.
- *
- * @return the next token
- * @exception IOException if any I/O-Error occurs
- */
- public String primGetNextToken() throws java.io.IOException {
- int yy_input;
- int yy_action;
-
-
- while (true) {
-
- yychar += yylength();
-
- yy_atBOL = yy_markedPos <= 0 || yy_buffer[yy_markedPos - 1] == '\n';
- if (!yy_atBOL && yy_buffer[yy_markedPos - 1] == '\r') {
- yy_atBOL = yy_advance() != '\n';
- if (!yy_atEOF)
- yy_currentPos--;
- }
-
- yy_action = -1;
-
- yy_currentPos = yy_startRead = yy_markedPos;
-
- if (yy_atBOL)
- yy_state = YY_LEXSTATE[yy_lexical_state + 1];
- else
- yy_state = YY_LEXSTATE[yy_lexical_state];
-
-
- yy_forAction : {
- while (true) {
-
- yy_input = yy_advance();
-
- if (yy_input == YYEOF)
- break yy_forAction;
-
- yy_input = yycmap[yy_input];
-
- boolean yy_isFinal = false;
- boolean yy_noLookAhead = false;
-
- yy_forNext : {
- switch (yy_state) {
- case 0 :
- switch (yy_input) {
- case 1 :
- yy_isFinal = true;
- yy_state = 9;
- break yy_forNext;
- case 2 :
- yy_isFinal = true;
- yy_state = 10;
- break yy_forNext;
- case 3 :
- yy_isFinal = true;
- yy_state = 11;
- break yy_forNext;
- case 10 :
- yy_isFinal = true;
- yy_state = 12;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 8;
- break yy_forNext;
- }
-
- case 1 :
- switch (yy_input) {
- case 1 :
- yy_isFinal = true;
- yy_state = 9;
- break yy_forNext;
- case 2 :
- yy_isFinal = true;
- yy_state = 10;
- break yy_forNext;
- case 3 :
- yy_isFinal = true;
- yy_state = 11;
- break yy_forNext;
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 13;
- break yy_forNext;
- case 10 :
- yy_isFinal = true;
- yy_state = 14;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 8;
- break yy_forNext;
- }
-
- case 2 :
- switch (yy_input) {
- case 11 :
- yy_isFinal = true;
- yy_state = 15;
- break yy_forNext;
- case 15 :
- yy_isFinal = true;
- yy_state = 16;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 8;
- break yy_forNext;
- }
-
- case 3 :
- switch (yy_input) {
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 17;
- break yy_forNext;
- case 26 :
- yy_isFinal = true;
- yy_state = 18;
- break yy_forNext;
- case 34 :
- yy_isFinal = true;
- yy_state = 19;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 8;
- break yy_forNext;
- }
-
- case 4 :
- switch (yy_input) {
- case 6 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 21;
- break yy_forNext;
- case 7 :
- yy_isFinal = true;
- yy_state = 22;
- break yy_forNext;
- case 32 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 23;
- break yy_forNext;
- case 35 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 24;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 20;
- break yy_forNext;
- }
-
- case 5 :
- switch (yy_input) {
- case 7 :
- case 9 :
- case 10 :
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 26;
- break yy_forNext;
- case 11 :
- case 34 :
- yy_isFinal = true;
- yy_state = 27;
- break yy_forNext;
- case 32 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 28;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 25;
- break yy_forNext;
- }
-
- case 6 :
- switch (yy_input) {
- case 7 :
- case 9 :
- case 10 :
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 26;
- break yy_forNext;
- case 34 :
- yy_isFinal = true;
- yy_state = 27;
- break yy_forNext;
- case 35 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 28;
- break yy_forNext;
- case 36 :
- yy_isFinal = true;
- yy_state = 29;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 25;
- break yy_forNext;
- }
-
- case 7 :
- switch (yy_input) {
- case 10 :
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 26;
- break yy_forNext;
- case 34 :
- yy_isFinal = true;
- yy_state = 27;
- break yy_forNext;
- case 11 :
- yy_isFinal = true;
- yy_state = 29;
- break yy_forNext;
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 30;
- break yy_forNext;
- case 32 :
- case 35 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 31;
- break yy_forNext;
- case 37 :
- yy_isFinal = true;
- yy_state = 32;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 25;
- break yy_forNext;
- }
-
- case 9 :
- switch (yy_input) {
- case 2 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 33;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 10 :
- switch (yy_input) {
- case 1 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 34;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 11 :
- switch (yy_input) {
- case 4 :
- yy_state = 35;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 12 :
- switch (yy_input) {
- case 13 :
- yy_state = 36;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 13 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 37;
- break yy_forNext;
- case 10 :
- yy_state = 38;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 14 :
- switch (yy_input) {
- case 13 :
- yy_state = 36;
- break yy_forNext;
- case 11 :
- yy_state = 39;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 15 :
- switch (yy_input) {
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 40;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 16 :
- switch (yy_input) {
- case 19 :
- yy_state = 41;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 18 :
- switch (yy_input) {
- case 16 :
- yy_state = 42;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 19 :
- switch (yy_input) {
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 43;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 21 :
- switch (yy_input) {
- case 6 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 21;
- break yy_forNext;
- case 7 :
- yy_state = 44;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 20;
- break yy_forNext;
- }
-
- case 22 :
- switch (yy_input) {
- case 6 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 21;
- break yy_forNext;
- case 7 :
- yy_state = 44;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 20;
- break yy_forNext;
- }
-
- case 27 :
- switch (yy_input) {
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 45;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 29 :
- switch (yy_input) {
- case 25 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 46;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 32 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 32;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 35 :
- switch (yy_input) {
- case 5 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 47;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 36 :
- switch (yy_input) {
- case 15 :
- yy_state = 48;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 37 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 37;
- break yy_forNext;
- case 10 :
- yy_state = 38;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 38 :
- switch (yy_input) {
- case 11 :
- yy_state = 39;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 39 :
- switch (yy_input) {
- case 12 :
- yy_state = 49;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 41 :
- switch (yy_input) {
- case 20 :
- yy_state = 50;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 42 :
- switch (yy_input) {
- case 16 :
- yy_state = 51;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 44 :
- switch (yy_input) {
- case 6 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 21;
- break yy_forNext;
- case 7 :
- yy_state = 44;
- break yy_forNext;
- default :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 20;
- break yy_forNext;
- }
-
- case 48 :
- switch (yy_input) {
- case 16 :
- yy_state = 52;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 49 :
- switch (yy_input) {
- case 13 :
- yy_state = 53;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 50 :
- switch (yy_input) {
- case 21 :
- yy_state = 54;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 51 :
- switch (yy_input) {
- case 27 :
- yy_state = 55;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 52 :
- switch (yy_input) {
- case 17 :
- yy_state = 56;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 53 :
- switch (yy_input) {
- case 14 :
- yy_state = 57;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 54 :
- switch (yy_input) {
- case 22 :
- yy_state = 58;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 55 :
- switch (yy_input) {
- case 28 :
- yy_state = 59;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 56 :
- switch (yy_input) {
- case 18 :
- yy_isFinal = true;
- yy_noLookAhead = true;
- yy_state = 60;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 57 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 61;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 58 :
- switch (yy_input) {
- case 23 :
- yy_state = 62;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 59 :
- switch (yy_input) {
- case 15 :
- yy_state = 63;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 61 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 61;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 62 :
- switch (yy_input) {
- case 19 :
- yy_state = 64;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 63 :
- switch (yy_input) {
- case 29 :
- yy_state = 65;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 64 :
- switch (yy_input) {
- case 24 :
- yy_state = 66;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 65 :
- switch (yy_input) {
- case 30 :
- yy_state = 67;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 66 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 66;
- break yy_forNext;
- case 8 :
- yy_isFinal = true;
- yy_state = 68;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 67 :
- switch (yy_input) {
- case 23 :
- yy_state = 69;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 68 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 68;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 69 :
- switch (yy_input) {
- case 31 :
- yy_state = 70;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 70 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 70;
- break yy_forNext;
- case 8 :
- yy_state = 71;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 71 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 71;
- break yy_forNext;
- case 20 :
- yy_state = 72;
- break yy_forNext;
- case 32 :
- yy_state = 73;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 72 :
- switch (yy_input) {
- case 21 :
- yy_state = 74;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 73 :
- switch (yy_input) {
- case 20 :
- yy_state = 72;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 74 :
- switch (yy_input) {
- case 19 :
- yy_state = 75;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 75 :
- switch (yy_input) {
- case 16 :
- yy_state = 76;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 76 :
- switch (yy_input) {
- case 15 :
- yy_state = 77;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 77 :
- switch (yy_input) {
- case 19 :
- yy_state = 78;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 78 :
- switch (yy_input) {
- case 16 :
- yy_state = 79;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 79 :
- switch (yy_input) {
- case 28 :
- yy_state = 80;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 80 :
- switch (yy_input) {
- case 16 :
- yy_state = 81;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 81 :
- switch (yy_input) {
- case 33 :
- yy_state = 82;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 82 :
- switch (yy_input) {
- case 27 :
- yy_state = 83;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 83 :
- switch (yy_input) {
- case 15 :
- yy_state = 84;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 84 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 85;
- break yy_forNext;
- case 32 :
- yy_state = 86;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 85 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 85;
- break yy_forNext;
- case 20 :
- yy_state = 87;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 86 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 85;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 87 :
- switch (yy_input) {
- case 21 :
- yy_state = 88;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 88 :
- switch (yy_input) {
- case 19 :
- yy_state = 89;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 89 :
- switch (yy_input) {
- case 16 :
- yy_state = 90;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 90 :
- switch (yy_input) {
- case 15 :
- yy_state = 91;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 91 :
- switch (yy_input) {
- case 19 :
- yy_state = 92;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 92 :
- switch (yy_input) {
- case 16 :
- yy_state = 93;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 93 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_state = 93;
- break yy_forNext;
- case 8 :
- yy_isFinal = true;
- yy_state = 94;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- case 94 :
- switch (yy_input) {
- case 6 :
- case 7 :
- case 9 :
- case 18 :
- yy_isFinal = true;
- yy_state = 94;
- break yy_forNext;
- default :
- break yy_forAction;
- }
-
- default :
- yy_ScanError(YY_ILLEGAL_STATE);
- break;
- }
- }
-
- if (yy_isFinal) {
- yy_action = yy_state;
- yy_markedPos = yy_currentPos;
- if (yy_noLookAhead)
- break yy_forAction;
- }
-
- }
- }
-
-
- switch (yy_action) {
-
- case 26 :
- {
- yypushback(1);
- popState();
- valueText = string.toString();
- return EncodingParserConstants.InvalidTerminatedStringValue;
- }
- case 96 :
- break;
- case 20 :
- case 21 :
- {
- yypushback(1);
- yybegin(UnDelimitedString);
- string.setLength(0);
- }
- case 97 :
- break;
- case 17 :
- {
- yybegin(YYINITIAL);
- if (foundContentTypeValue)
- hasMore = false;
- return HTMLHeadTokenizerConstants.MetaTagEnd;
- }
- case 98 :
- break;
- case 31 :
- {
- yypushback(1);
- popState();
- valueText = string.toString();
- return EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue;
- }
- case 99 :
- break;
- case 43 :
- {
- yybegin(YYINITIAL);
- if (foundContentTypeValue)
- hasMore = false;
- return HTMLHeadTokenizerConstants.MetaTagEnd;
- }
- case 100 :
- break;
- case 45 :
- {
- yypushback(2);
- popState();
- valueText = string.toString();
- return EncodingParserConstants.InvalidTerminatedStringValue;
- }
- case 101 :
- break;
- case 46 :
- {
- yypushback(2);
- popState();
- valueText = string.toString();
- return EncodingParserConstants.InvalidTerminatedStringValue;
- }
- case 102 :
- break;
- case 61 :
- {
- if (yychar == 0) {
- yybegin(ST_XMLDecl);
- return XMLHeadTokenizerConstants.XMLDeclStart;
- }
- }
- case 103 :
- break;
- case 8 :
- case 9 :
- case 10 :
- case 11 :
- case 12 :
- case 13 :
- case 14 :
- case 15 :
- case 16 :
- case 18 :
- case 19 :
- case 22 :
- {
- if (yychar > MAX_TO_SCAN) {
- hasMore = false;
- return EncodingParserConstants.MAX_CHARS_REACHED;
- }
- }
- case 104 :
- break;
- case 60 :
- {
- yybegin(ST_META_TAG);
- return HTMLHeadTokenizerConstants.MetaTagStart;
- }
- case 105 :
- break;
- case 40 :
- {
- yybegin(YYINITIAL);
- return XMLHeadTokenizerConstants.XMLDeclEnd;
- }
- case 106 :
- break;
- case 94 :
- {
- pushCurrentState();
- yybegin(QuotedAttributeValue);
- foundContentTypeValue = true;
- return HTMLHeadTokenizerConstants.MetaTagContentType;
- }
- case 107 :
- break;
- case 68 :
- {
- pushCurrentState();
- yybegin(QuotedAttributeValue);
- return XMLHeadTokenizerConstants.XMLDelEncoding;
- }
- case 108 :
- break;
- case 33 :
- {
- hasMore = false;
- return EncodingParserConstants.UTF16BE;
- }
- case 109 :
- break;
- case 34 :
- {
- hasMore = false;
- return EncodingParserConstants.UTF16LE;
- }
- case 110 :
- break;
- case 47 :
- {
- hasMore = false;
- return EncodingParserConstants.UTF83ByteBOM;
- }
- case 111 :
- break;
- case 28 :
- {
- popState();
- valueText = string.toString();
- return EncodingParserConstants.StringValue;
- }
- case 112 :
- break;
- case 25 :
- case 27 :
- case 29 :
- case 32 :
- {
- string.append(yytext());
- }
- case 113 :
- break;
- case 24 :
- {
- yybegin(SQ_STRING);
- string.setLength(0);
- }
- case 114 :
- break;
- case 23 :
- {
- yybegin(DQ_STRING);
- string.setLength(0);
- }
- case 115 :
- break;
- case 30 :
- {
- yypushback(1);
- popState();
- valueText = string.toString();
- return EncodingParserConstants.UnDelimitedStringValue;
- }
- case 116 :
- break;
- default :
- if (yy_input == YYEOF && yy_startRead == yy_currentPos) {
- yy_atEOF = true;
- yy_do_eof();
- {
- hasMore = false;
- return EncodingParserConstants.EOF;
- }
- }
- else {
- yy_ScanError(YY_NO_MATCH);
- }
- }
- }
- }
-
- /**
- * Runs the scanner on input files.
- *
- * This main method is the debugging routine for the scanner.
- * It prints each returned token to System.out until the end of
- * file is reached, or an error occured.
- *
- * @param argv the command line, contains the filenames to run
- * the scanner on.
- */
- public static void main(String argv[]) {
- for (int i = 0; i < argv.length; i++) {
- HTMLHeadTokenizer scanner = null;
- try {
- scanner = new HTMLHeadTokenizer(new java.io.FileReader(argv[i]));
- }
- catch (java.io.FileNotFoundException e) {
- System.out.println("File not found : \"" + argv[i] + "\"");
- System.exit(1);
- }
- catch (ArrayIndexOutOfBoundsException e) {
- System.out.println("Usage : java HTMLHeadTokenizer <inputfile>");
- System.exit(1);
- }
-
- try {
- do {
- System.out.println(scanner.primGetNextToken());
- }
- while (!scanner.yy_atEOF);
-
- }
- catch (java.io.IOException e) {
- System.out.println("An I/O error occured while scanning :");
- System.out.println(e);
- System.exit(1);
- }
- catch (Exception e) {
- e.printStackTrace();
- System.exit(1);
- }
- }
- }
-
-
-} \ No newline at end of file
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizerConstants.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizerConstants.java
deleted file mode 100644
index 938bcde421..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLHeadTokenizerConstants.java
+++ /dev/null
@@ -1,20 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-
-public interface HTMLHeadTokenizerConstants {
-
- String MetaTagEnd = "MetaTagEnd"; //$NON-NLS-1$
- String MetaTagStart = "MetaTagStart"; //$NON-NLS-1$
- String MetaTagContentType = "MetaTagContentType"; //$NON-NLS-1$
-
-} \ No newline at end of file
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java
deleted file mode 100644
index bff22197a9..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HTMLResourceEncodingDetector.java
+++ /dev/null
@@ -1,187 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-import java.io.IOException;
-import java.util.regex.Pattern;
-
-import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
-import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
-import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
-import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants;
-
-public class HTMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
-
- private HTMLHeadTokenizer fTokenizer;
-
- /**
- * There is no spec defined encoding for HTML (historically), so null is
- * returned.
- */
- public String getSpecDefaultEncoding() {
- return null;
- }
-
- private boolean canHandleAsUnicodeStream(String tokenType) {
- boolean canHandleAsUnicodeStream = false;
- if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
- canHandleAsUnicodeStream = true;
- String enc = "UTF-8"; //$NON-NLS-1$
- createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
- fEncodingMemento.setUTF83ByteBOMUsed(true);
- }
- else if (tokenType == EncodingParserConstants.UTF16BE) {
- canHandleAsUnicodeStream = true;
- String enc = "UTF-16BE"; //$NON-NLS-1$
- createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
- }
- else if (tokenType == EncodingParserConstants.UTF16LE) {
- canHandleAsUnicodeStream = true;
- String enc = "UTF-16"; //$NON-NLS-1$
- createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
- }
- return canHandleAsUnicodeStream;
- }
-
- /**
- * @return Returns the tokenizer.
- */
- private HTMLHeadTokenizer getTokenizer() {
- // TODO: need to work on 'reset' in tokenizer, so new instance isn't
- // always needed
- //if (fTokenizer == null) {
- fTokenizer = new HTMLHeadTokenizer();
- // }
- return fTokenizer;
- }
-
- private boolean isLegalString(String valueTokenType) {
- if (valueTokenType == null)
- return false;
- else
- return valueTokenType.equals(EncodingParserConstants.StringValue) || valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
- }
-
- protected void parseInput() throws IOException {
- checkInContent();
- if (fEncodingMemento == null) {
- checkHeuristics();
- }
- }
-
- /**
- *
- */
- private void checkHeuristics() throws IOException {
- boolean noHeuristic = false;
- String heuristicEncoding = null;
- try {
- fReader.reset();
- fReader.mark(CodedIO.MAX_MARK_SIZE);
- byte[] bytes = new byte[CodedIO.MAX_MARK_SIZE];
- int nRead = 0;
- for (int i = 0; i < bytes.length; i++) {
- if (fReader.ready()) {
- int oneByte = fReader.read();
- nRead++;
- if (oneByte <= 0xFF) {
- bytes[i] = (byte) oneByte;
- }
- else {
- noHeuristic = true;
- break;
- }
- }
- else {
- break;
- }
- }
- if (!noHeuristic) {
- heuristicEncoding = EncodingGuesser.guessEncoding(bytes, nRead);
- }
- }
- catch (IOException e) {
- // if any IO exception, then not a heuristic case
- }
- finally {
- fReader.reset();
- }
- if (heuristicEncoding != null) {
- createEncodingMemento(heuristicEncoding, EncodingMemento.GUESSED_ENCODING_FROM_STREAM);
- }
-
- }
-
- private void checkInContent() throws IOException {
- HTMLHeadTokenizer tokenizer = getTokenizer();
- tokenizer.reset(fReader);
- HeadParserToken token = null;
- String tokenType = null;
- String contentTypeValue = null;
- do {
- token = tokenizer.getNextToken();
- tokenType = token.getType();
- if (canHandleAsUnicodeStream(tokenType)) {
- // side effect of canHandle is to create appropriate
- // memento
- }
- else if (tokenType == HTMLHeadTokenizerConstants.MetaTagContentType) {
- if (tokenizer.hasMoreTokens()) {
- HeadParserToken valueToken = tokenizer.getNextToken();
- String valueTokenType = valueToken.getType();
- if (isLegalString(valueTokenType)) {
- contentTypeValue = valueToken.getText();
-
- }
- }
- }
-
- }
- while (tokenizer.hasMoreTokens());
- if (contentTypeValue != null) {
- parseContentTypeValue(contentTypeValue);
- }
- }
-
- private void parseContentTypeValue(String contentType) {
- String charset = null;
- Pattern pattern = Pattern.compile(";\\s*charset\\s*=\\s*"); //$NON-NLS-1$
- String[] parts = pattern.split(contentType);
- if (parts.length > 0) {
- // if only one item, it can still be charset instead of
- // contentType
- if (parts.length == 1) {
- if (parts[0].length() > 6) {
- String checkForCharset = parts[0].substring(0, 7);
- if (checkForCharset.equalsIgnoreCase("charset")) { //$NON-NLS-1$
- int eqpos = parts[0].indexOf('=');
- eqpos = eqpos + 1;
- if (eqpos < parts[0].length()) {
- charset = parts[0].substring(eqpos);
- charset = charset.trim();
- }
- }
- }
- }
- else {
- //fContentType = parts[0];
- }
- }
- if (parts.length > 1) {
- charset = parts[1];
- }
-
- if (charset != null && charset.length() > 0) {
- createEncodingMemento(charset, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
- }
- }
-
-} \ No newline at end of file
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HeadParserToken.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HeadParserToken.java
deleted file mode 100644
index ffd0406733..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/HeadParserToken.java
+++ /dev/null
@@ -1,44 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2001, 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- * Jens Lukowski/Innoopract - initial renaming/restructuring
- *
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-public class HeadParserToken {
- private int fStart;
-
- private String fText;
- private String fType;
-
- protected HeadParserToken() {
- super();
- }
-
- public HeadParserToken(String type, int start, String text) {
- this();
- fType = type;
- fStart = start;
- fText = text;
-
- }
-
- public String getText() {
- return fText;
- }
-
- public String getType() {
- return fType;
- }
-
- public String toString() {
- return ("text: " + fText + " offset: " + fStart + " type: " + fType); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
- }
-}
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/IntStack.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/IntStack.java
deleted file mode 100644
index 5006745bf3..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/IntStack.java
+++ /dev/null
@@ -1,99 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2001, 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- * Jens Lukowski/Innoopract - initial renaming/restructuring
- *
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-/*
- *
- * A non-resizable class implementing the behavior of java.util.Stack, but
- * directly for the <code> integer </code> primitive.
- */
-import java.util.EmptyStackException;
-
-public class IntStack {
- private int[] list = null;
-
- private int size = 0;
-
- public IntStack() {
- this(100);
- }
-
- public IntStack(int maxdepth) {
- super();
- list = new int[maxdepth];
- initialize();
- }
-
- public void clear() {
- initialize();
- }
-
- public boolean empty() {
- return size == 0;
- }
-
- public int get(int slot) {
- return list[slot];
- }
-
- private void initialize() {
- for (int i = 0; i < list.length; i++)
- list[i] = -1;
- }
-
- /**
- * Returns the int at the top of the stack without removing it
- *
- * @return int at the top of this stack.
- * @exception EmptyStackException
- * when empty.
- */
- public int peek() {
- if (size == 0)
- throw new EmptyStackException();
- return list[size - 1];
- }
-
- /**
- * Removes and returns the int at the top of the stack
- *
- * @return int at the top of this stack.
- * @exception EmptyStackException
- * when empty.
- */
- public int pop() {
- int value = peek();
- list[size - 1] = -1;
- size--;
- return value;
- }
-
- /**
- * Pushes an item onto the top of this stack.
- *
- * @param newValue -
- * the int to be pushed onto this stack.
- * @return the <code>newValue</code> argument.
- */
- public int push(int newValue) {
- if (size == list.length) {
- throw new StackOverflowError();
- }
- list[size++] = newValue;
- return newValue;
- }
-
- public int size() {
- return list.length;
- }
-}
diff --git a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/NullMemento.java b/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/NullMemento.java
deleted file mode 100644
index 5e22af0a61..0000000000
--- a/bundles/org.eclipse.wst.html.core/src/org/eclipse/wst/html/core/internal/contenttype/NullMemento.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*******************************************************************************
- * Copyright (c) 2001, 2004 IBM Corporation and others.
- * All rights reserved. This program and the accompanying materials
- * are made available under the terms of the Eclipse Public License v1.0
- * which accompanies this distribution, and is available at
- * http://www.eclipse.org/legal/epl-v10.html
- *
- * Contributors:
- * IBM Corporation - initial API and implementation
- * Jens Lukowski/Innoopract - initial renaming/restructuring
- *
- *******************************************************************************/
-package org.eclipse.wst.html.core.internal.contenttype;
-
-import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
-import org.eclipse.wst.sse.core.internal.encoding.NonContentBasedEncodingRules;
-
-
-
-/**
- * This class can be used in place of an EncodingMemento (its super class),
- * when there is not in fact ANY encoding information. For example, when a
- * structuredDocument is created directly from a String
- */
-public class NullMemento extends EncodingMemento {
- /**
- *
- */
- public NullMemento() {
- super();
- String defaultCharset = NonContentBasedEncodingRules.useDefaultNameRules(null);
- setJavaCharsetName(defaultCharset);
- setAppropriateDefault(defaultCharset);
- setDetectedCharsetName(null);
- }
-
-}

Back to the top