Blame - bundles/org.eclipse.wst.xml.core/src/org/eclipse/wst/xml/core/internal/contenttype/XMLResourceEncodingDetector.java - sourceediting/webtools.sourceediting

blob: 8a21b2e07735c02582266e3edb55f537cca6b4be [file] [log] [blame]

david_williams	9621348	2004-11-11 09:07:12 +0000	[diff] [blame]	1	/*******************************************************************************
				2	* Copyright (c) 2001, 2004 IBM Corporation and others.
				3	* All rights reserved. This program and the accompanying materials
				4	* are made available under the terms of the Eclipse Public License v1.0
				5	* which accompanies this distribution, and is available at
				6	* http://www.eclipse.org/legal/epl-v10.html
				7	*
				8	* Contributors:
				9	* IBM Corporation - initial API and implementation
				10	* Jens Lukowski/Innoopract - initial renaming/restructuring
				11	*
				12	*******************************************************************************/
david_williams	282b8f4	2005-02-14 07:00:56 +0000	[diff] [blame]	13	package org.eclipse.wst.xml.core.internal.contenttype;
david_williams	9621348	2004-11-11 09:07:12 +0000	[diff] [blame]	14
				15	import java.io.IOException;
				16
david_williams	9621348	2004-11-11 09:07:12 +0000	[diff] [blame]	17	import org.eclipse.wst.common.encoding.EncodingMemento;
david_williams	9621348	2004-11-11 09:07:12 +0000	[diff] [blame]	18	import org.eclipse.wst.common.encoding.IResourceCharsetDetector;
david_williams	9621348	2004-11-11 09:07:12 +0000	[diff] [blame]	19
				20
				21	public class XMLResourceEncodingDetector extends AbstractResourceEncodingDetector implements IResourceCharsetDetector {
				22	private XMLHeadTokenizer fTokenizer;
				23
				24	private boolean canHandleAsUnicodeStream(String tokenType) {
				25	boolean canHandleAsUnicodeStream = false;
				26	if (tokenType == EncodingParserConstants.UTF83ByteBOM) {
				27	canHandleAsUnicodeStream = true;
				28	String enc = "UTF-8"; //$NON-NLS-1$
				29	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
				30	fEncodingMemento.setUTF83ByteBOMUsed(true);
				31	}
				32
				33	else if (tokenType == EncodingParserConstants.UTF16BE) {
				34	canHandleAsUnicodeStream = true;
				35	String enc = "UTF-16BE"; //$NON-NLS-1$
				36	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
				37	} else if (tokenType == EncodingParserConstants.UTF16LE) {
				38	canHandleAsUnicodeStream = true;
				39	String enc = "UTF-16"; //$NON-NLS-1$
				40	createEncodingMemento(enc, EncodingMemento.DETECTED_STANDARD_UNICODE_BYTES);
				41	}
				42	return canHandleAsUnicodeStream;
				43	}
				44
				45	public String getSpecDefaultEncoding() {
				46	// by default, UTF-8 as per XML spec
				47	final String enc = "UTF-8"; //$NON-NLS-1$
				48	return enc;
				49	}
				50
				51	/**
				52	* @return Returns the tokenizer.
				53	*/
				54	private XMLHeadTokenizer getTokenizer() {
				55	// TODO: need to work on 'reset' in tokenizer, so new instance isn't
				56	// always needed
				57	//if (fTokenizer == null) {
				58	fTokenizer = new XMLHeadTokenizer();
				59	//}
				60	return fTokenizer;
				61	}
				62
				63	private boolean isLegalString(String valueTokenType) {
				64	if (valueTokenType == null)
				65	return false;
				66	else
				67	return valueTokenType.equals(EncodingParserConstants.StringValue) \|\| valueTokenType.equals(EncodingParserConstants.UnDelimitedStringValue) \|\| valueTokenType.equals(EncodingParserConstants.InvalidTerminatedStringValue) \|\| valueTokenType.equals(EncodingParserConstants.InvalidTermintatedUnDelimitedStringValue);
				68	}
				69
				70	protected void parseInput() throws IOException {
				71	XMLHeadTokenizer tokenizer = getTokenizer();
				72	tokenizer.reset(fReader);
				73	HeadParserToken token = null;
				74	String tokenType = null;
				75	do {
				76	token = tokenizer.getNextToken();
				77	tokenType = token.getType();
				78	if (canHandleAsUnicodeStream(tokenType)) {
				79	// side effect of canHandle is to create appropriate memento
				80	} else {
				81	if (tokenType == XMLHeadTokenizerConstants.XMLDelEncoding) {
				82	if (tokenizer.hasMoreTokens()) {
				83	token = tokenizer.getNextToken();
				84	tokenType = token.getType();
				85	if (isLegalString(tokenType)) {
				86	String enc = token.getText();
				87	if (enc != null && enc.length() > 0) {
				88	createEncodingMemento(enc, EncodingMemento.FOUND_ENCODING_IN_CONTENT);
				89	}
				90
				91	}
				92	}
				93	}
				94	}
				95	} while (tokenizer.hasMoreTokens());
				96
				97	}
				98
				99	}