diff options
Diffstat (limited to 'bundles/org.eclipse.jst.jsp.core/src/org/eclipse/jst/jsp/core/internal/contenttype/JSPResourceEncodingDetector.java')
-rw-r--r-- | bundles/org.eclipse.jst.jsp.core/src/org/eclipse/jst/jsp/core/internal/contenttype/JSPResourceEncodingDetector.java | 521 |
1 files changed, 0 insertions, 521 deletions
diff --git a/bundles/org.eclipse.jst.jsp.core/src/org/eclipse/jst/jsp/core/internal/contenttype/JSPResourceEncodingDetector.java b/bundles/org.eclipse.jst.jsp.core/src/org/eclipse/jst/jsp/core/internal/contenttype/JSPResourceEncodingDetector.java deleted file mode 100644 index 5323479364..0000000000 --- a/bundles/org.eclipse.jst.jsp.core/src/org/eclipse/jst/jsp/core/internal/contenttype/JSPResourceEncodingDetector.java +++ /dev/null @@ -1,521 +0,0 @@ -/******************************************************************************* - * Copyright (c) 2004, 2006 IBM Corporation and others. - * All rights reserved. This program and the accompanying materials - * are made available under the terms of the Eclipse Public License v1.0 - * which accompanies this distribution, and is available at - * http://www.eclipse.org/legal/epl-v10.html - * - * Contributors: - * IBM Corporation - initial API and implementation - *******************************************************************************/ -package org.eclipse.jst.jsp.core.internal.contenttype; - -import java.io.BufferedInputStream; -import java.io.BufferedReader; -import java.io.IOException; -import java.io.InputStream; -import java.io.Reader; -import java.nio.charset.Charset; -import java.nio.charset.IllegalCharsetNameException; -import java.nio.charset.UnsupportedCharsetException; -import java.util.regex.Pattern; - -import org.eclipse.core.resources.IStorage; -import org.eclipse.core.runtime.CoreException; -import org.eclipse.wst.sse.core.internal.encoding.CodedIO; -import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento; -import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector; -import org.eclipse.wst.sse.core.internal.encoding.NonContentBasedEncodingRules; -import org.eclipse.wst.xml.core.internal.contenttype.EncodingParserConstants; -import org.eclipse.wst.xml.core.internal.contenttype.XMLHeadTokenizerConstants; - -public class JSPResourceEncodingDetector implements IResourceCharsetDetector { - - private String fCharset; - - private String fContentType; - - private String fContentTypeValue; - - private String fLanguage; - - private String fPageEncodingValue; - - private JSPHeadTokenizer fTokenizer; - - private String fXMLDecEncodingName; - - private boolean unicodeCase; - - private EncodingMemento fEncodingMemento; - - private boolean fHeaderParsed; - - private Reader fReader; - - private boolean fXHTML; - - private boolean fWML; - - - /** - * No Arg constructor. - */ - public JSPResourceEncodingDetector() { - super(); - } - - class NullMemento extends EncodingMemento { - /** - * - */ - public NullMemento() { - super(); - String defaultCharset = NonContentBasedEncodingRules.useDefaultNameRules(null); - setJavaCharsetName(defaultCharset); - setAppropriateDefault(defaultCharset); - setDetectedCharsetName(null); - } - - } - - /** - * @return Returns the contentType. - */ - public String getContentType() throws IOException { - ensureInputSet(); - if (!fHeaderParsed) { - parseInput(); - // we keep track of if header's already been parse, so can make - // multiple 'get' calls, without causing reparsing. - fHeaderParsed = true; - // Note: there is a "hidden assumption" here that an empty - // string in content should be treated same as not present. - } - return fContentType; - } - - public String getEncoding() throws IOException { - return getEncodingMemento().getDetectedCharsetName(); - } - - // to ensure consist overall rules used, we'll mark as - // final, - // and require subclasses to provide certain pieces of - // the - // implementation - public EncodingMemento getEncodingMemento() throws IOException { - ensureInputSet(); - if (!fHeaderParsed) { - parseInput(); - // we keep track of if header's already been - // parse, so can make - // multiple 'get' calls, without causing - // reparsing. - fHeaderParsed = true; - // Note: there is a "hidden assumption" here - // that an empty - // string in content should be treated same as - // not present. - } - if (fEncodingMemento == null) { - handleSpecDefault(); - } - if (fEncodingMemento == null) { - // safty net - fEncodingMemento = new NullMemento(); - } - return fEncodingMemento; - } - - public String getLanguage() throws IOException { - ensureInputSet(); - if (!fHeaderParsed) { - parseInput(); - fHeaderParsed = true; - } - return fLanguage; - } - - public String getSpecDefaultEncoding() { - // by JSP Spec - final String enc = "ISO-8859-1"; //$NON-NLS-1$ - return enc; - } - - public EncodingMemento getSpecDefaultEncodingMemento() { - resetAll(); - EncodingMemento result = null; - String enc = getSpecDefaultEncoding(); - if (enc != null) { - createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT); - fEncodingMemento.setAppropriateDefault(enc); - result = fEncodingMemento; - } - return result; - } - - /** - * - */ - public void set(InputStream inputStream) { - resetAll(); - fReader = new ByteReader(inputStream); - try { - fReader.mark(CodedIO.MAX_MARK_SIZE); - } - catch (IOException e) { - // impossible, since we know ByteReader - // supports marking - throw new Error(e); - } - } - - /** - * - */ - public void set(IStorage iStorage) throws CoreException { - resetAll(); - InputStream inputStream = iStorage.getContents(); - InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE); - resettableStream.mark(CodedIO.MAX_MARK_SIZE); - set(resettableStream); - // TODO we'll need to "remember" IFile, or - // get its (or its project's) settings, in case - // those are needed to handle cases when the - // encoding is not in the file stream. - } - - /** - * Note: this is not part of interface to help avoid confusion ... it - * expected this Reader is a well formed character reader ... that is, its - * all ready been determined to not be a unicode marked input stream. And, - * its assumed to be in the correct position, at position zero, ready to - * read first character. - */ - public void set(Reader reader) { - resetAll(); - fReader = reader; - if (!fReader.markSupported()) { - fReader = new BufferedReader(fReader); - } - try { - fReader.mark(CodedIO.MAX_MARK_SIZE); - } - catch (IOException e) { - // impossble, since we just checked if markable - throw new Error(e); - } - } - - private boolean canHandleAsUnicodeStream(String tokenType) { - boolean canHandleAsUnicode = false; - if (tokenType == EncodingParserConstants.UTF83ByteBOM) { - canHandleAsUnicode = true; - String enc = "UTF-8"; //$NON-NLS-1$ - createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES); - fEncodingMemento.setUTF83ByteBOMUsed(true); - } - else if (tokenType == EncodingParserConstants.UTF16BE) { - canHandleAsUnicode = true; - String enc = "UTF-16BE"; //$NON-NLS-1$ - createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES); - } - else if (tokenType == EncodingParserConstants.UTF16LE) { - canHandleAsUnicode = true; - String enc = "UTF-16"; //$NON-NLS-1$ - createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES); - } - return canHandleAsUnicode; - } - - /** - * Note: once this instance is created, trace info still needs to be - * appended by caller, depending on the context its created. - */ - private void createEncodingMemento(String detectedCharsetName) { - fEncodingMemento = new EncodingMemento(); - fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName)); - fEncodingMemento.setDetectedCharsetName(detectedCharsetName); - // TODO: if detectedCharset and spec default is - // null, need to use "work - // bench based" defaults. - fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding()); - } - - /** - * There can sometimes be mulitple 'encodings' specified in a file. This - * is an attempt to centralize the rules for deciding between them. - * Returns encoding according to priority: 1. XML Declaration 2. page - * directive pageEncoding name 3. page directive contentType charset name - */ - private String getAppropriateEncoding() { - String result = null; - if (fXMLDecEncodingName != null) - result = fXMLDecEncodingName; - else if (fPageEncodingValue != null) - result = fPageEncodingValue; - else if (fCharset != null) - result = fCharset; - return result; - } - - /** - * This method can return null, if invalid charset name (in which case - * "appropriateDefault" should be used, if a name is really need for some - * "save anyway" cases). - * - * @param detectedCharsetName - * @return - */ - private String getAppropriateJavaCharset(String detectedCharsetName) { - String result = null; - // 1. Check explicit mapping overrides from - // property file -- its here we pick up "rules" for cases - // that are not even in Java - result = CodedIO.checkMappingOverrides(detectedCharsetName); - // 2. Use the "canonical" name from JRE mappings - // Note: see Charset JavaDoc, the name you get one - // with can be alias, - // the name you get back is "standard" name. - Charset javaCharset = null; - try { - javaCharset = Charset.forName(detectedCharsetName); - } - catch (UnsupportedCharsetException e) { - // only set invalid, if result is same as detected -- they won't - // be equal if - // overridden - if (result != null && result.equals(detectedCharsetName)) { - fEncodingMemento.setInvalidEncoding(detectedCharsetName); - } - } - catch (IllegalCharsetNameException e) { - // only set invalid, if result is same as detected -- they won't - // be equal if - // overridden - if (result != null && result.equals(detectedCharsetName)) { - fEncodingMemento.setInvalidEncoding(detectedCharsetName); - } - } - // give priority to java cononical name, if present - if (javaCharset != null) { - result = javaCharset.name(); - // but still allow overrides - result = CodedIO.checkMappingOverrides(result); - } - return result; - } - - private JSPHeadTokenizer getTokinizer() { - if (fTokenizer == null) { - fTokenizer = new JSPHeadTokenizer(); - } - return fTokenizer; - } - - private void handleSpecDefault() { - String encodingName; - encodingName = getSpecDefaultEncoding(); - if (encodingName != null) { - // createEncodingMemento(encodingName, - // EncodingMemento.USED_CONTENT_TYPE_DEFAULT); - fEncodingMemento = new EncodingMemento(); - fEncodingMemento.setJavaCharsetName(encodingName); - fEncodingMemento.setAppropriateDefault(encodingName); - } - } - - private boolean isLegalString(String valueTokenType) { - boolean result = false; - if (valueTokenType != null) { - result = valueTokenType.equals(EncodingParserConstants.StringValue) || valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue); - } - return result; - } - - - /** - * This method should be exactly the same as what is in - * JSPHeadTokenizerTester - * @param contentType - */ - private void parseContentTypeValue(String contentType) { - Pattern pattern = Pattern.compile(";\\s*charset\\s*=\\s*"); //$NON-NLS-1$ - String[] parts = pattern.split(contentType); - if (parts.length > 0) { - // if only one item, it can still be charset instead of - // contentType - if (parts.length == 1) { - if (parts[0].length() > 6) { - String checkForCharset = parts[0].substring(0, 7); - if (checkForCharset.equalsIgnoreCase("charset")) { //$NON-NLS-1$ - int eqpos = parts[0].indexOf('='); - eqpos = eqpos + 1; - if (eqpos < parts[0].length()) { - fCharset = parts[0].substring(eqpos); - fCharset = fCharset.trim(); - } - } - else { - fContentType = parts[0]; - } - } - } - else { - fContentType = parts[0]; - } - } - if (parts.length > 1) { - fCharset = parts[1]; - } - } - - - /** - * Looks for what ever encoding properties the tokenizer returns. Its the - * responsibility of the tokenizer to stop when appropriate and not go too - * far. - */ - private void parseHeader(JSPHeadTokenizer tokenizer) throws IOException { - fPageEncodingValue = null; - fCharset = null; - - HeadParserToken token = null; - do { - // don't use 'get' here (at least until reset issue fixed) - token = tokenizer.getNextToken(); - String tokenType = token.getType(); - if (canHandleAsUnicodeStream(tokenType)) - unicodeCase = true; - else { - - if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) { - if (tokenizer.hasMoreTokens()) { - HeadParserToken valueToken = tokenizer.getNextToken(); - String valueTokenType = valueToken.getType(); - if (isLegalString(valueTokenType)) { - fXMLDecEncodingName = valueToken.getText(); - } - } - } - else if (tokenType == JSPHeadTokenizerConstants.PageEncoding) { - if (tokenizer.hasMoreTokens()) { - HeadParserToken valueToken = tokenizer.getNextToken(); - String valueTokenType = valueToken.getType(); - if (isLegalString(valueTokenType)) { - fPageEncodingValue = valueToken.getText(); - } - } - } - else if (tokenType == JSPHeadTokenizerConstants.PageContentType) { - if (tokenizer.hasMoreTokens()) { - HeadParserToken valueToken = tokenizer.getNextToken(); - String valueTokenType = valueToken.getType(); - if (isLegalString(valueTokenType)) { - fContentTypeValue = valueToken.getText(); - } - } - } - else if (tokenType == JSPHeadTokenizerConstants.PageLanguage) { - if (tokenizer.hasMoreTokens()) { - HeadParserToken valueToken = tokenizer.getNextToken(); - String valueTokenType = valueToken.getType(); - if (isLegalString(valueTokenType)) { - fLanguage = valueToken.getText(); - } - } - } - } - } - while (tokenizer.hasMoreTokens()); - if (fContentTypeValue != null) { - parseContentTypeValue(fContentTypeValue); - } - if (tokenizer.isXHTML()) { - fXHTML = true; - } - if (tokenizer.isWML() ) { - fWML = true; - } - - - } - - private void parseInput() throws IOException { - JSPHeadTokenizer tokenizer = getTokinizer(); - fReader.reset(); - tokenizer.reset(fReader); - parseHeader(tokenizer); - // unicode stream cases are created directly in parseHeader - if (!unicodeCase) { - String enc = getAppropriateEncoding(); - if (enc != null && enc.length() > 0) { - createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT); - } - } - } - - /** - * - */ - private void resetAll() { - fReader = null; - fHeaderParsed = false; - fEncodingMemento = null; - fCharset = null; - fContentTypeValue = null; - fPageEncodingValue = null; - fXMLDecEncodingName = null; - unicodeCase = false; - fXHTML=false; - fWML=false; - } - - - - /** - * convience method all subclasses can use (but not override) - * - * @param detectedCharsetName - * @param reason - */ - private void createEncodingMemento(String detectedCharsetName, String reason) { - createEncodingMemento(detectedCharsetName); - } - - /** - * convience method all subclasses can use (but not override) - */ - private void ensureInputSet() { - if (fReader == null) { - throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$ - } - } - - public boolean isWML() throws IOException { - ensureInputSet(); - if (!fHeaderParsed) { - parseInput(); - // we keep track of if header's already been parse, so can make - // multiple 'get' calls, without causing reparsing. - fHeaderParsed = true; - // Note: there is a "hidden assumption" here that an empty - // string in content should be treated same as not present. - } - return fWML; - } - - public boolean isXHTML() throws IOException { - ensureInputSet(); - if (!fHeaderParsed) { - parseInput(); - // we keep track of if header's already been parse, so can make - // multiple 'get' calls, without causing reparsing. - fHeaderParsed = true; - // Note: there is a "hidden assumption" here that an empty - // string in content should be treated same as not present. - } - return fXHTML; - } -} |