Skip to main content

This CGIT instance is deprecated, and repositories have been moved to Gitlab or Github. See the repository descriptions for specific locations.

summaryrefslogtreecommitdiffstats
blob: 9f469622df073f57c677270667689294f60daad8 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*******************************************************************************
 * Copyright (c) 2003, 2004 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors:
 * IBM Corporation - initial API and implementation
 *******************************************************************************/
/*
 * Created on May 20, 2004
 *
 * TODO To change the template for this generated file go to
 * Window - Preferences - Java - Code Generation - Code and Comments
 */
package org.eclipse.wst.common.internal.emf.resource;

/**
 * @author mdelder
 *  
 */
public class XMLEncoderDecoder {

	/**
	 *  
	 */
	public XMLEncoderDecoder() {
		super();
	}

	/**
	 * Identifies the last printable character in the Unicode range that is supported by the
	 * encoding used with this serializer. For 8-bit encodings this will be either 0x7E or 0xFF. For
	 * 16-bit encodings this will be 0xFFFF. Characters that are not printable will be escaped using
	 * character references.
	 */
	private int _lastPrintable = 0x7E;

	protected static XMLEncoderDecoder _singleton;


	/**
	 * Returns a decoded version of the value.
	 */
	public String decode(String value) {
		// NOT_IMPLEMENTED
		return value;
	}


	/**
	 * Escapes a string so it may be printed as text content or attribute value. Non printable
	 * characters are escaped using character references. Where the format specifies a deault entity
	 * reference, that reference is used (e.g. <tt>&amp;lt;</tt>).
	 * 
	 * @param source
	 *            The string to escape
	 */
	public char[] encode(char[] value) {
		boolean unmodified = true;
		StringBuffer sbuf = new StringBuffer(value.length);
		String charRef = null;
		char ch;
		for (int i = 0; i < value.length; ++i) {
			ch = value[i];
			// If there is a suitable entity reference for this
			// character, print it. The list of available entity
			// references is almost but not identical between
			// XML and HTML.
			charRef = getEntityRef(ch);
			if (charRef != null) {
				sbuf.append('&');
				sbuf.append(charRef);
				sbuf.append(';');
				unmodified = false;
			} else if ((ch >= ' ' && ch <= _lastPrintable && ch != 0xF7) || ch == '\n' || ch == '\r' || ch == '\t') {
				// If the character is not printable, print as character
				// reference.
				// Non printables are below ASCII space but not tab or line
				// terminator, ASCII delete, or above a certain Unicode
				// threshold.
				sbuf.append(ch);
			} else {
				sbuf.append("&#");//$NON-NLS-1$
				sbuf.append(Integer.toString(ch));
				sbuf.append(';');
				unmodified = false;
			}
		}
		if (unmodified)
			return value;
		char[] result = new char[sbuf.length()];
		sbuf.getChars(0, sbuf.length(), result, 0);
		return result;
	}

	/**
	 * Escapes a string so it may be printed as text content or attribute value. Non printable
	 * characters are escaped using character references. Where the format specifies a deault entity
	 * reference, that reference is used (e.g. <tt>&amp;lt;</tt>).
	 * 
	 * @param source
	 *            The string to escape
	 */
	public String encode(String value) {
		StringBuffer sbuf = new StringBuffer(value.length());
		String charRef = null;
		char ch;
		for (int i = 0; i < value.length(); ++i) {
			ch = value.charAt(i);
			// If there is a suitable entity reference for this
			// character, print it. The list of available entity
			// references is almost but not identical between
			// XML and HTML.
			charRef = getEntityRef(ch);
			if (charRef != null) {
				sbuf.append('&');
				sbuf.append(charRef);
				sbuf.append(';');
			} else if ((ch >= ' ' && ch <= _lastPrintable && ch != 0xF7) || ch == '\n' || ch == '\r' || ch == '\t') {
				// If the character is not printable, print as character
				// reference.
				// Non printables are below ASCII space but not tab or line
				// terminator, ASCII delete, or above a certain Unicode
				// threshold.
				sbuf.append(ch);
			} else {
				sbuf.append("&#");//$NON-NLS-1$
				sbuf.append(Integer.toString(ch));
				sbuf.append(';');
			}
		}
		return sbuf.toString();
	}

	public static String escape(String value) {
		if (_singleton == null) {
			_singleton = new XMLEncoderDecoder();
		}
		return _singleton.encode(value);
	}

	/**
	 * Returns the suitable entity reference for this character value, or null if no such entity
	 * exists. Calling this method with <tt>'&amp;'</tt> will return <tt>"&amp;amp;"</tt>.
	 * 
	 * @param ch
	 *            Character value
	 * @return Character entity name, or null
	 */
	protected String getEntityRef(char ch) {
		// Encode special XML characters into the equivalent character
		// references.
		// These five are defined by default for all XML documents.
		switch (ch) {
			case '<' :
				return "lt";//$NON-NLS-1$
			case '>' :
				return "gt";//$NON-NLS-1$
			case '"' :
				return "quot";//$NON-NLS-1$
			case '\'' :
				return "apos";//$NON-NLS-1$
			case '&' :
				return "amp";//$NON-NLS-1$
		}
		return null;
	}
}

Back to the top