Blame - bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingMemento.java - sourceediting/webtools.sourceediting

blob: 853dfde27d2221b80c4fb9dbc65c65c7821060cb [file] [log] [blame]

david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	1	/*******************************************************************************
				2	* Copyright (c) 2001, 2004 IBM Corporation and others.
				3	* All rights reserved. This program and the accompanying materials
				4	* are made available under the terms of the Eclipse Public License v1.0
				5	* which accompanies this distribution, and is available at
				6	* http://www.eclipse.org/legal/epl-v10.html
				7	*
				8	* Contributors:
				9	* IBM Corporation - initial API and implementation
				10	* Jens Lukowski/Innoopract - initial renaming/restructuring
				11	*
				12	*******************************************************************************/
				13	package org.eclipse.wst.sse.core.internal.encoding;
				14
				15	import org.eclipse.core.runtime.content.IContentDescription;
				16
				17
				18	/**
				19	* This class is to simply hold information and data about the type of
				20	* encoding found for a resource. It not only includes names, etc., but also
				21	* gives hints about the algorithm, or rule, that the encodng was determined.
				22	* Having all this info in a central object, associated with the Document
				23	* (technically, IStructuredDocument), allows for better user error messages,
				24	* and better handling of knowing how to dump a file, given we know how it was
				25	* loaded.
				26	*
				27	* Note: the data in this class is only valid if its has actually gone through
				28	* the loading or dumping sequence. It is not accurate, for example, if a
				29	* structuredDocument is simply created and then setText called. In this type
				30	* of case, accuracy for loading and dumping is not required, since its all
				31	* re-discovered. One limitation is that structuredDocument's created "from
				32	* scratch" this way, don't have any encoding information to count on, and
				33	* would have to arrange the processing to be done. (And it is done,
				34	* automatically if going through loader or dumper, but perhaps not in future
				35	* new uses. TODO: this can be inproved in future versions.)
				36	*
				37	* isInitialized is set when the loader or dumper processes have been used,
				38	* but even this can't be counted on 100% if the document has been modified
				39	* since.
david_williams	7a65dc2	2005-04-09 02:19:50 +0000	[diff] [blame]	40	*
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	41	*/
				42	public class EncodingMemento implements Cloneable {
				43
				44	public final static String CLONED = "cloned"; //$NON-NLS-1$
				45	public final static String DEFAULTS_ASSUMED_FOR_EMPTY_INPUT = "DefaultsAssumedForEmptyInput"; //$NON-NLS-1$
				46	public final static String DEFAULTS_USED_DUE_TO_SMALL_STREAM = "defaultsUsedDueToSmallStream"; //$NON-NLS-1$
				47
				48
				49	/*
				50	* Strings to be used for tracing. TODO: need to clean this up, we no
				51	* longer use all of them
				52	*/
				53	public final static String DETECTED_STANDARD_UNICODE_BYTES = "detectedStandardUnicodeBytes"; //$NON-NLS-1$
				54	public final static String FOUND_ENCODING_IN_CONTENT = "foundEncodingInContent"; //$NON-NLS-1$
				55	public final static String FOUND_ENCODING_IN_STREAM = "foundEncodingInStream"; //$NON-NLS-1$
				56	public final static String FOUND_ENCODING_IN_STRUCTURED_DOCUMENT = "foundEncodingInStructuredDocument"; //$NON-NLS-1$
				57	public final static String GUESSED_ENCODING_FROM_STREAM = "GuessEncodingFromStream"; //$NON-NLS-1$
				58	public final static String JAVA_NAME_FOUND_AS_IANA_NAME = "noMappingFoundButJavaNameFoundToBeIANAName"; //$NON-NLS-1$
				59	public final static String JAVA_NAME_FOUND_IN_ALIAS_NAME = "noMappingFoundButJavaNameFoundInAliasTable"; //$NON-NLS-1$
				60	public final static String NO_IANA_NAME_FOUND = "noMappingFoundFromJavaNameToIANAName"; //$NON-NLS-1$
				61	public final static String USED_CONTENT_TYPE_DEFAULT = "UsedContentTypeDefault"; //$NON-NLS-1$
				62	public final static String USED_JAVA_DEFAULT = "UsedJavaDefault"; //$NON-NLS-1$
				63	public final static String USED_MEMENTO_FROM_LOAD = "usedMementoFromLoad"; //$NON-NLS-1$
				64	public final static String USED_PROPERTY_SETTINGS = "USED_PROPERTY_SETTINGS"; //$NON-NLS-1$
				65	public final static String USED_USER_SPECIFIED_PREFERENCE = "UsedUserSpecifiedPreference"; //$NON-NLS-1$
				66	public final static String USED_WORKSPACE_DEFAULT = "UsedWorkspaceDefault"; //$NON-NLS-1$
				67	public final static String USER_IS_USING_JAVA_ENCODING = "UserIsUsingJavaEncoding"; //$NON-NLS-1$
				68	private String fAppropriateDefault;
				69	private String fDetectedCharsetName;
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	70	private String fInvalidEncoding;
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	71
				72
				73	private String fJavaCharsetName;
				74	private boolean fUnicodeStream;
				75	private boolean fUTF83ByteBOMUsed;
				76
david_williams	126339f	2005-07-05 05:54:08 +0000	[diff] [blame]	77	public EncodingMemento() {
				78	super();
				79	}
				80
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	81	/**
david_williams	7a65dc2	2005-04-09 02:19:50 +0000	[diff] [blame]	82	* Returns a clone of this object.
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	83	*/
				84	public Object clone() {
				85	EncodingMemento object = null;
				86	try {
				87	object = (EncodingMemento) super.clone();
david_williams	7a65dc2	2005-04-09 02:19:50 +0000	[diff] [blame]	88	}
				89	catch (CloneNotSupportedException e) {
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	90	// impossible, since we're implementing here
				91	}
				92
				93	return object;
				94
				95	}
				96
				97	/**
				98	* Returns the appropriateDefault. This is only set if an invalid encoding
				99	* was found, and contains an charset appropriate to use as a default
				100	* value, if, for example, the user decides to load the document anyway,
				101	* even though the charset was found to be invalid.
				102	*
				103	* @return String
				104	*/
				105	public String getAppropriateDefault() {
				106	if (fAppropriateDefault == null) {
				107	fAppropriateDefault = NonContentBasedEncodingRules.useDefaultNameRules(null);
				108	}
				109	return fAppropriateDefault;
				110	}
				111
				112	/**
				113	* Returns the charset name, if it is different from the charset name
				114	* found in getJavaCharsetName. This can happen, for example, if there are
				115	* differences in case. This method might return SHIFT_JIS, and the the
				116	* getJavaCharsetName might return Shift_JIS -- if SHIFT_JIS was detected
				117	* in file/document. If the original file contained the correct case, then
				118	* this method would return null. The getJavaCharsetName is typically the
				119	* one that should always be used, and this one only used for certain
				120	* error conditions, or or if when creating a "duplicate" resource, it was
				121	* desired to use exactly the charset name as in the original document. As
				122	* an example of this later case, the original document might contain
				123	* ISO-8859-9, but the detected charset name might contain ISO-8859-9-I.
				124	*
				125	* @return String
				126	*/
				127	public String getDetectedCharsetName() {
				128	return fDetectedCharsetName;
				129	}
				130
				131	/**
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	132	* Returns a charset name that was detected, but not found to be a charset
				133	* suppoorted by the VM.
				134	*
				135	* @return String
				136	*/
				137	public String getInvalidEncoding() {
				138	return fInvalidEncoding;
				139	}
				140
				141	/**
				142	* Returns the java cononical charset name.
				143	*
				144	* @return String
				145	*/
				146	public String getJavaCharsetName() {
				147	return fJavaCharsetName;
				148	}
				149
				150	/**
				151	* Note: we may be able to remove this method, if it turns out this work
				152	* is done by "text" type.
				153	*
				154	* @deprecated -
				155	*/
				156	public byte[] getUnicodeBOM() {
				157	byte[] bom = null;
				158	if (isUTF83ByteBOMUsed())
				159	bom = IContentDescription.BOM_UTF_8;
				160	else if (isUnicodeStream()) {
				161	if (getJavaCharsetName().equals("UTF-16") \|\| getJavaCharsetName().equals("UTF-16LE")) { //$NON-NLS-1$ //$NON-NLS-2$
				162	bom = IContentDescription.BOM_UTF_16LE;
david_williams	7a65dc2	2005-04-09 02:19:50 +0000	[diff] [blame]	163	}
				164	else if (getJavaCharsetName().equals("UTF-16BE")) { //$NON-NLS-1$
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	165	bom = IContentDescription.BOM_UTF_16BE;
				166	}
				167
				168	}
				169	return bom;
				170	}
				171
				172	/**
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	173	* Note: in our implementation, the stream is a unicode stream if the
				174	* charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is
				175	* not considered unicode stream here.
				176	*
				177	* @return returns true if is a unicode (UTF-16) stream
				178	*/
				179	public boolean isUnicodeStream() {
				180	return fUnicodeStream;
				181	}
				182
				183	/**
				184	* Note: in our implementation, the stream is a unicode stream if the
				185	* charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is
				186	* not considered unicode stream here.
				187	*
				188	* Set during load, can be used by dumper to write 3 byte BOM, which Java
				189	* does not normally do. This helps maintain compatibility with other
				190	* programs (those that wrote the 3 byte BOM there to begin with.
				191	*
				192	* @return boolean
				193	*/
				194	public boolean isUTF83ByteBOMUsed() {
				195	return fUTF83ByteBOMUsed;
				196	}
				197
				198	public boolean isValid() {
				199	return getInvalidEncoding() == null;
				200	}
				201
				202	/**
				203	* Sets the appropriateDefault.
				204	*
				205	* @param appropriateDefault
				206	* The appropriateDefault to set
				207	*/
				208	public void setAppropriateDefault(String appropriateDefault) {
				209	fAppropriateDefault = appropriateDefault;
				210	}
				211
				212
				213	public void setDetectedCharsetName(String detectedCharsetName) {
				214	fDetectedCharsetName = detectedCharsetName;
				215	}
				216
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	217	public void setInvalidEncoding(String invalidEncoding) {
				218	fInvalidEncoding = invalidEncoding;
				219	}
				220
				221	/**
david_williams	dce4ddd	2005-03-18 05:35:37 +0000	[diff] [blame]	222	* Sets the javaEncodingName.
				223	*
				224	* @param javaEncodingName
				225	* The javaEncodingName to set
				226	*/
				227	public void setJavaCharsetName(String javaCharsetName) {
				228	fJavaCharsetName = javaCharsetName;
				229	}
				230
				231	/**
				232	* @param b
				233	*/
				234	public void setUnicodeStream(boolean unicodeStream) {
				235	fUnicodeStream = unicodeStream;
				236
				237	}
				238
				239	/**
				240	* Sets the uTF83ByteBOMfound.
				241	*
				242	* @param uTF83ByteBOMfound
				243	* The uTF83ByteBOMfound to set
				244	*/
				245	public void setUTF83ByteBOMUsed(boolean uTF83ByteBOMUsed) {
				246	fUTF83ByteBOMUsed = uTF83ByteBOMUsed;
				247	}
				248
				249	}