diff options
Diffstat (limited to 'bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java')
-rw-r--r-- | bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java | 213 |
1 files changed, 0 insertions, 213 deletions
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java deleted file mode 100644 index e3c9d995d4..0000000000 --- a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java +++ /dev/null @@ -1,213 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2001, 2005 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - * Jens Lukowski/Innoopract - initial renaming/restructuring - * - *******************************************************************************/ -package org.eclipse.wst.sse.core.internal.encoding.util; - -import java.io.BufferedInputStream; -import java.io.IOException; -import java.io.InputStream; -import java.io.Reader; -import java.nio.charset.Charset; - -import org.eclipse.core.resources.IStorage; -import org.eclipse.core.runtime.CoreException; -import org.eclipse.wst.sse.core.internal.encoding.CodedIO; -import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento; -import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; - - -/** - * This is a "common function" class to decide if an input stream, is a - * unicode stream. - */ -public class UnicodeBOMEncodingDetector implements IResourceCharsetDetector { - - //private static final String UTF_16_CHARSET_NAME = "UTF-16"; - // //$NON-NLS-1$ - - public static class NotEnoughInputForBOMException extends IOException { - - /** - * Default <code>serialVersionUID</code> - */ - private static final long serialVersionUID = 1L; - - public NotEnoughInputForBOMException() { - super(); - } - - public NotEnoughInputForBOMException(String s) { - super(s); - } - - } - - private final static byte BB = (byte) 0xBB; - private final static byte BF = (byte) 0xBF; - private final static byte EF = (byte) 0xEF; - private final static byte FE = (byte) -2; - - private final static byte FF = (byte) -1; - private static final String UTF_16BE_CHARSET_NAME = "UTF-16BE"; //$NON-NLS-1$ - private static final String UTF_16LE_CHARSET_NAME = "UTF-16LE"; //$NON-NLS-1$ - - private static final String UTF_8_CHARSET_NAME = "UTF-8"; //$NON-NLS-1$ - - private InputStream fInputStream = null; - private boolean fNoBOMPossible; - - private EncodingMemento checkForBOM(InputStream inputStream) { - EncodingMemento result = null; - - try { - byte b1 = getNextByte(inputStream); - byte b2 = getNextByte(inputStream); - if (b1 == FE && b2 == FF) { - result = createEncodingMemento(UTF_16BE_CHARSET_NAME); - result.setUnicodeStream(true); - } else { - if (b1 == FF && b2 == FE) { - result = createEncodingMemento(UTF_16LE_CHARSET_NAME); - result.setUnicodeStream(true); - } else { - byte b3 = getNextByte((inputStream)); - if (b1 == EF && b2 == BB && b3 == BF) { - result = createEncodingMemento(UTF_8_CHARSET_NAME); - result.setUTF83ByteBOMUsed(true); - } - } - } - } catch (NotEnoughInputForBOMException e) { - // This is sort of unexpected for normal cases, but can occur for - // empty - // streams. And, this can occur "normally" for non-BOM streams - // that - // have only two - // bytes, and for which those two bytes match the first two bytes - // of UTF-8 - // BOM In any case, we'll simply return null; - result = null; - } catch (IOException e) { - // other errors should be impossible - throw new Error(e); - } - - return result; - } - - private EncodingMemento createEncodingMemento(String javaEncodingName) { - EncodingMemento encodingMemento = new EncodingMemento(); - encodingMemento.setJavaCharsetName(javaEncodingName); - String ianaName = Charset.forName(javaEncodingName).name(); - encodingMemento.setDetectedCharsetName(ianaName); - if (javaEncodingName.equals(UTF_8_CHARSET_NAME)) { - encodingMemento.setUTF83ByteBOMUsed(true); - } - return encodingMemento; - } - - public String getEncoding() throws IOException { - - return getEncodingMemento().getDetectedCharsetName(); - } - - /** - * Returns IANA encoding name if BOM detected in stream. If a BOM is - * detected, the stream is left positioned after readying the BOM. If a - * BOM is not detected, the steam is reset. - * - * 0xFEFF UTF-16, big-endian 0xFFFE UTF-16, little-endian 0xEFBBBF UTF-8 - * (BOM is optional) - * - * @param inputStream - - * must be a resetable (mark supported) stream so it can be - * reset, if not BOM encoded stream - * @return String - IANA encodingname (may not work well on 1.3, but 1.4 - * seems to have good support for IANA names) - */ - public EncodingMemento getEncodingMemento() { - - EncodingMemento result = null; - if (!fNoBOMPossible) { - - if (fInputStream == null) - throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$ - - if (!fInputStream.markSupported()) { - throw new IllegalArgumentException("inputStream must be resetable"); //$NON-NLS-1$ - } - - result = checkForBOM(fInputStream); - } - - return result; - - } - - private byte getNextByte(InputStream inputStream) throws IOException { - - int byteCharAsInt = -1; - // be sure we won't block - if (inputStream.available() > 0) { - byteCharAsInt = inputStream.read(); - byteCharAsInt = byteCharAsInt & 0XFF; - } - // to avoid confustion over meaning of returned byte, - // throw exception if EOF reached. - if (byteCharAsInt == -1) - throw new NotEnoughInputForBOMException("typically not an error"); //$NON-NLS-1$ - return (byte) byteCharAsInt; - } - - /** - * - */ - - public String getSpecDefaultEncoding() { - // There is no default for this case - return null; - } - - /** - * - */ - private void resetAll() { - fNoBOMPossible = false; - fInputStream = null; - - } - - /** - * - */ - - public void set(InputStream inputStream) { - resetAll(); - fInputStream = inputStream; - } - - public void set(IStorage iStorage) throws CoreException { - set(new BufferedInputStream(iStorage.getContents(), CodedIO.MAX_BUF_SIZE)); - - } - - public void set(Reader reader) { - if (reader instanceof ByteReader) { - ByteReader byteReader = (ByteReader) reader; - fInputStream = byteReader.fInputStream; - } else { - fNoBOMPossible = true; - } - - } - -} |