blob: 8a21b2e07735c02582266e3edb55f537cca6b4be [file] [log] [blame]
david_williams96213482004-11-11 09:07:12 +00001/*******************************************************************************
2 * Copyright (c) 2001, 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * IBM Corporation - initial API and implementation
10 * Jens Lukowski/Innoopract - initial renaming/restructuring
11 *
12 *******************************************************************************/
david_williams282b8f42005-02-14 07:00:56 +000013package org.eclipse.wst.xml.core.internal.contenttype;
david_williams96213482004-11-11 09:07:12 +000014
15import java.io.IOException;
16
david_williams96213482004-11-11 09:07:12 +000017import org.eclipse.wst.common.encoding.EncodingMemento;
david_williams96213482004-11-11 09:07:12 +000018import org.eclipse.wst.common.encoding.IResourceCharsetDetector;
david_williams96213482004-11-11 09:07:12 +000019
20
21public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
22 private XMLHeadTokenizer fTokenizer;
23
24 private boolean canHandleAsUnicodeStream(String tokenType) {
25 boolean canHandleAsUnicodeStream = false;
26 if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
27 canHandleAsUnicodeStream = true;
28 String enc = "UTF-8"; //$NON-NLS-1$
29 createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
30 fEncodingMemento.setUTF83ByteBOMUsed(true);
31 }
32
33 else if (tokenType == EncodingParserConstants.UTF16BE) {
34 canHandleAsUnicodeStream = true;
35 String enc = "UTF-16BE"; //$NON-NLS-1$
36 createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
37 } else if (tokenType == EncodingParserConstants.UTF16LE) {
38 canHandleAsUnicodeStream = true;
39 String enc = "UTF-16"; //$NON-NLS-1$
40 createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
41 }
42 return canHandleAsUnicodeStream;
43 }
44
45 public String getSpecDefaultEncoding() {
46 // by default, UTF-8 as per XML spec
47 final String enc = "UTF-8"; //$NON-NLS-1$
48 return enc;
49 }
50
51 /**
52 * @return Returns the tokenizer.
53 */
54 private XMLHeadTokenizer getTokenizer() {
55 // TODO: need to work on 'reset' in tokenizer, so new instance isn't
56 // always needed
57 //if (fTokenizer == null) {
58 fTokenizer = new XMLHeadTokenizer();
59 //}
60 return fTokenizer;
61 }
62
63 private boolean isLegalString(String valueTokenType) {
64 if (valueTokenType == null)
65 return false;
66 else
67 return valueTokenType.equals(EncodingParserConstants.StringValue) || valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) || valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
68 }
69
70 protected void parseInput() throws IOException {
71 XMLHeadTokenizer tokenizer = getTokenizer();
72 tokenizer.reset(fReader);
73 HeadParserToken token = null;
74 String tokenType = null;
75 do {
76 token = tokenizer.getNextToken();
77 tokenType = token.getType();
78 if (canHandleAsUnicodeStream(tokenType)) {
79 // side effect of canHandle is to create appropriate memento
80 } else {
81 if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) {
82 if (tokenizer.hasMoreTokens()) {
83 token = tokenizer.getNextToken();
84 tokenType = token.getType();
85 if (isLegalString(tokenType)) {
86 String enc = token.getText();
87 if (enc != null && enc.length() > 0) {
88 createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
89 }
90
91 }
92 }
93 }
94 }
95 } while (tokenizer.hasMoreTokens());
96
97 }
98
99}