Skip to main content
summaryrefslogtreecommitdiffstats
blob: 07937e3ff24bc67fbcc730ffcf7526be4248465b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
/*******************************************************************************
 * Copyright (c) 2001, 2004 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *     Jens Lukowski/Innoopract - initial renaming/restructuring
 *     
 *******************************************************************************/
package org.eclipse.wst.dtd.core.internal.encoding;

import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;

import org.eclipse.core.resources.IStorage;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;


public abstract class AbstractResourceEncodingDetector implements IResourceCharsetDetector {

	protected EncodingMemento fEncodingMemento;

	protected boolean fHeaderParsed;

	protected Reader fReader;

	/**
	 * 
	 */
	public AbstractResourceEncodingDetector() {
		super();
	}

	/**
	 * Note: once this instance is created, trace info still needs to be
	 * appended by caller, depending on the context its created.
	 */
	private void createEncodingMemento(String detectedCharsetName) {
		fEncodingMemento = new EncodingMemento();
		fEncodingMemento.setJavaCharsetName(getAppropriateJavaCharset(detectedCharsetName));
		fEncodingMemento.setDetectedCharsetName(detectedCharsetName);
		// TODO: if detectedCharset and spec default is
		// null, need to use "work
		// bench based" defaults.
		fEncodingMemento.setAppropriateDefault(getSpecDefaultEncoding());
	}

	/**
	 * convience method all subclasses can use (but not override)
	 * 
	 * @param detectedCharsetName
	 * @param reason
	 */
	final protected void createEncodingMemento(String detectedCharsetName, String reason) {
		createEncodingMemento(detectedCharsetName);
	}

	/**
	 * convience method all subclasses can use (but not override)
	 */
	final protected void ensureInputSet() {
		if (fReader == null) {
			throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
		}
	}

	/**
	 * This method can return null, if invalid charset name (in which case
	 * "appropriateDefault" should be used, if a name is really need for some
	 * "save anyway" cases).
	 * 
	 * @param detectedCharsetName
	 * @return
	 */
	private String getAppropriateJavaCharset(String detectedCharsetName) {
		String result = null;
		// 1. Check explicit mapping overrides from
		// property file -- its here we pick up "rules" for cases
		// that are not even in Java
		result = CodedIO.checkMappingOverrides(detectedCharsetName);
		// 2. Use the "canonical" name from JRE mappings
		// Note: see Charset JavaDoc, the name you get one
		// with can be alias,
		// the name you get back is "standard" name.
		Charset javaCharset = null;
		try {
			javaCharset = Charset.forName(detectedCharsetName);
		}
		catch (UnsupportedCharsetException e) {
			// only set invalid, if result is same as detected -- they won't
			// be equal if
			// overridden
			if (result != null && result.equals(detectedCharsetName)) {
				fEncodingMemento.setInvalidEncoding(detectedCharsetName);
			}
		}
		catch (IllegalCharsetNameException e) {
			// only set invalid, if result is same as detected -- they won't
			// be equal if
			// overridden
			if (result != null && result.equals(detectedCharsetName)) {
				fEncodingMemento.setInvalidEncoding(detectedCharsetName);
			}
		}
		// give priority to java cononical name, if present
		if (javaCharset != null) {
			result = javaCharset.name();
			// but still allow overrides
			result = CodedIO.checkMappingOverrides(result);
		}
		return result;
	}

	public String getEncoding() throws IOException {
		return getEncodingMemento().getDetectedCharsetName();
	}

	// to ensure consist overall rules used, we'll mark as
	// final,
	// and require subclasses to provide certain pieces of
	// the
	// implementation
	public EncodingMemento getEncodingMemento() throws IOException {
		ensureInputSet();
		if (!fHeaderParsed) {
			parseInput();
			// we keep track of if header's already been
			// parse, so can make
			// multiple 'get' calls, without causing
			// reparsing.
			fHeaderParsed = true;
			// Note: there is a "hidden assumption" here
			// that an empty
			// string in content should be treated same as
			// not present.
		}
		if (fEncodingMemento == null) {
			handleSpecDefault();
		}
		if (fEncodingMemento == null) {
			// safty net
			fEncodingMemento = new NullMemento();
		}
		return fEncodingMemento;
	}

	/**
	 * This is to return a default encoding -- as specified by an industry
	 * content type spec -- when not present in the stream, for example, XML
	 * specifies UTF-8, JSP specifies ISO-8859-1. This method should return
	 * null if there is no such "spec default".
	 */
	abstract public String getSpecDefaultEncoding();

	public EncodingMemento getSpecDefaultEncodingMemento() {
		resetAll();
		EncodingMemento result = null;
		String enc = getSpecDefaultEncoding();
		if (enc != null) {
			createEncodingMemento(enc, EncodingMemento.DEFAULTS_ASSUMED_FOR_EMPTY_INPUT);
			fEncodingMemento.setAppropriateDefault(enc);
			result = fEncodingMemento;
		}
		return result;
	}

	private void handleSpecDefault() {
		String encodingName;
		encodingName = getSpecDefaultEncoding();
		if (encodingName != null) {
			// createEncodingMemento(encodingName,
			// EncodingMemento.USED_CONTENT_TYPE_DEFAULT);
			fEncodingMemento = new EncodingMemento();
			fEncodingMemento.setJavaCharsetName(encodingName);
			fEncodingMemento.setAppropriateDefault(encodingName);
		}
	}

	/**
	 * Every subclass must provide a way to parse the input. This method has
	 * several critical responsibilities:
	 * <li>set the fEncodingMemento field appropriately, according to the
	 * results of the parse of fReader.</li>
	 * <li>set fHarderParsed to true, to avoid wasted re-parsing.</li>
	 */
	abstract protected void parseInput() throws IOException;

	/**
	 * 
	 */
	private void resetAll() {
		fReader = null;
		fHeaderParsed = false;
		fEncodingMemento = null;
	}

	/**
	 * 
	 */
	public void set(InputStream inputStream) {
		resetAll();
		fReader = new ByteReader(inputStream);
		try {
			fReader.mark(CodedIO.MAX_MARK_SIZE);
		}
		catch (IOException e) {
			// impossible, since we know ByteReader
			// supports marking
			throw new Error(e);
		}
	}

	/**
	 * 
	 */
	public void set(IStorage iStorage) throws CoreException {
		resetAll();
		InputStream inputStream = iStorage.getContents();
		InputStream resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
		resettableStream.mark(CodedIO.MAX_MARK_SIZE);
		set(resettableStream);
		// TODO we'll need to "remember" IFile, or
		// get its (or its project's) settings, in case
		// those are needed to handle cases when the
		// encoding is not in the file stream.
	}

	/**
	 * Note: this is not part of interface to help avoid confusion ... it
	 * expected this Reader is a well formed character reader ... that is, its
	 * all ready been determined to not be a unicode marked input stream. And,
	 * its assumed to be in the correct position, at position zero, ready to
	 * read first character.
	 */
	public void set(Reader reader) {
		resetAll();
		fReader = reader;
		if (!fReader.markSupported()) {
			fReader = new BufferedReader(fReader);
		}
		try {
			fReader.mark(CodedIO.MAX_MARK_SIZE);
		}
		catch (IOException e) {
			// impossble, since we just checked if markable
			throw new Error(e);
		}
	}
}

Back to the top