Skip to main content

This CGIT instance is deprecated, and repositories have been moved to Gitlab or Github. See the repository descriptions for specific locations.

summaryrefslogtreecommitdiffstats
blob: d03f79788844482a10cf4f66eaed31a333de7582 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
/*******************************************************************************
 * Copyright (c) 2004 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/
package org.eclipse.wst.css.core.internal.encoding;

import java.util.Iterator;

import org.eclipse.jface.text.IDocument;
import org.eclipse.wst.css.core.internal.contenttype.CSSResourceEncodingDetector;
import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts;
import org.eclipse.wst.sse.core.internal.document.DocumentReader;
import org.eclipse.wst.sse.core.internal.document.IDocumentCharsetDetector;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegionList;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList;
import org.eclipse.wst.sse.core.utils.StringUtils;


public class CSSDocumentCharsetDetector extends CSSResourceEncodingDetector implements IDocumentCharsetDetector {

	public String getEncodingName(IStructuredDocument structuredDocument) {
		String result = null;
		// if the document is empty, then there will be no nodes,
		// so no need to continue.
		IStructuredDocumentRegionList nodes = structuredDocument.getRegionList();
		if (nodes.getLength() > 0) {
			IStructuredDocumentRegion node = null;
			// skip any initial whitespace
			// Note that @charset "encodingname";
			// must appear at very beginning of document,
			// to be valid.
			// May have to test with "damaged" files (e.g.
			// beginning EOLs, etc., to verify this works
			// as expected.
			for (int i = 0; i < nodes.getLength(); i++) {
				node = nodes.item(i);
				if (getType(node) != CSSRegionContexts.CSS_S) {
					break;
				}
			}
			Iterator regions = node.getRegions().iterator();
			ITextRegion region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
			if (region != null) {
				ITextRegion valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_STRING, regions);
				if (valueRegion == null) {
					// if didn't find the region, its probably due to ill
					// formed input, such as
					// @charset "ISO-8859-6;
					// so we'll try again for "unknown" region.
					// If that fails, we'll give up?
					regions = node.getRegions().iterator();
					region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
					if (region != null) {
						valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_UNKNOWN, regions);
						if (valueRegion != null) {
							result = node.getText(valueRegion);
						}
					}
				}
				else {
					result = node.getText(valueRegion);
				}
				result = StringUtils.stripNonLetterDigits(result);
			}
		}
		return result;
	}

	public String getEncodingName(IDocument document) {
		String enc = null;
		if (document instanceof IStructuredDocument) {
			enc = getEncodingName((IStructuredDocument) document);
		}
		else {
			// TODO Important: need to implement some "raw" parser for
			// IDocument level
		}
		return enc;
	}

	private String getType(IStructuredDocumentRegion node) {
		if (node == null)
			return null;
		ITextRegionList regions = node.getRegions();
		if (regions == null || regions.size() == 0)
			return null;
		ITextRegion region = regions.get(0);
		String result = region.getType();
		return result;
	}

	private ITextRegion getNextRegionOfType(String type, Iterator regions) {
		if (type == null)
			return null;
		if (regions == null)
			return null;
		ITextRegion result = null;
		while (regions.hasNext()) {
			ITextRegion region = (ITextRegion) regions.next();
			if (region.getType() == type) {
				result = region;
				break;
			}
		}
		return result;
	}

	/**
	 * 
	 */

	public void set(IDocument document) {
		set(new DocumentReader(document, 0));

	}

}

Back to the top