1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
|
/*******************************************************************************
* Copyright (c) 2004 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM Corporation - initial API and implementation
*******************************************************************************/
package org.eclipse.wst.css.core.internal.encoding;
import java.util.Iterator;
import org.eclipse.jface.text.IDocument;
import org.eclipse.wst.css.core.internal.contenttype.CSSResourceEncodingDetector;
import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts;
import org.eclipse.wst.sse.core.internal.document.DocumentReader;
import org.eclipse.wst.sse.core.internal.document.IDocumentCharsetDetector;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocument;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.IStructuredDocumentRegionList;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegionList;
import org.eclipse.wst.sse.core.utils.StringUtils;
public class CSSDocumentCharsetDetector extends CSSResourceEncodingDetector implements IDocumentCharsetDetector {
public String getEncodingName(IStructuredDocument structuredDocument) {
String result = null;
// if the document is empty, then there will be no nodes,
// so no need to continue.
IStructuredDocumentRegionList nodes = structuredDocument.getRegionList();
if (nodes.getLength() > 0) {
IStructuredDocumentRegion node = null;
// skip any initial whitespace
// Note that @charset "encodingname";
// must appear at very beginning of document,
// to be valid.
// May have to test with "damaged" files (e.g.
// beginning EOLs, etc., to verify this works
// as expected.
for (int i = 0; i < nodes.getLength(); i++) {
node = nodes.item(i);
if (getType(node) != CSSRegionContexts.CSS_S) {
break;
}
}
Iterator regions = node.getRegions().iterator();
ITextRegion region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
if (region != null) {
ITextRegion valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_STRING, regions);
if (valueRegion == null) {
// if didn't find the region, its probably due to ill
// formed input, such as
// @charset "ISO-8859-6;
// so we'll try again for "unknown" region.
// If that fails, we'll give up?
regions = node.getRegions().iterator();
region = getNextRegionOfType(CSSRegionContexts.CSS_CHARSET, regions);
if (region != null) {
valueRegion = getNextRegionOfType(CSSRegionContexts.CSS_UNKNOWN, regions);
if (valueRegion != null) {
result = node.getText(valueRegion);
}
}
}
else {
result = node.getText(valueRegion);
}
result = StringUtils.stripNonLetterDigits(result);
}
}
return result;
}
public String getEncodingName(IDocument document) {
String enc = null;
if (document instanceof IStructuredDocument) {
enc = getEncodingName((IStructuredDocument) document);
}
else {
// TODO Important: need to implement some "raw" parser for
// IDocument level
}
return enc;
}
private String getType(IStructuredDocumentRegion node) {
if (node == null)
return null;
ITextRegionList regions = node.getRegions();
if (regions == null || regions.size() == 0)
return null;
ITextRegion region = regions.get(0);
String result = region.getType();
return result;
}
private ITextRegion getNextRegionOfType(String type, Iterator regions) {
if (type == null)
return null;
if (regions == null)
return null;
ITextRegion result = null;
while (regions.hasNext()) {
ITextRegion region = (ITextRegion) regions.next();
if (region.getType() == type) {
result = region;
break;
}
}
return result;
}
/**
*
*/
public void set(IDocument document) {
set(new DocumentReader(document, 0));
}
}
|