[88419] Encoding code should move to internal, and SSE
diff --git a/bundles/org.eclipse.wst.sse.core/.classpath b/bundles/org.eclipse.wst.sse.core/.classpath
index 275b34c..496b784 100644
--- a/bundles/org.eclipse.wst.sse.core/.classpath
+++ b/bundles/org.eclipse.wst.sse.core/.classpath
@@ -3,5 +3,6 @@
<classpathentry kind="src" path="src/"/>
<classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/>
<classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER"/>
+ <classpathentry kind="src" path="src-encoding"/>
<classpathentry kind="output" path="bin"/>
</classpath>
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset.properties b/bundles/org.eclipse.wst.sse.core/config/charset.properties
new file mode 100644
index 0000000..9e00ac8
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/Unicode(UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/Unicode(UTF-16) Big Endian
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/Unicode(UTF-16BE) Big Endian
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/Unicode(UTF-16LE) Little Endian
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latin-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=Central/East European (Slavic)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=Southern European
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=Arabic, Logical
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=Arabic
+codeset.9.iana=windows-1256
+
+codeset.10.label=Chinese, National Standard
+codeset.10.iana=GB18030
+
+codeset.11.label=Traditional Chinese, Big5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=Cyrillic, ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=Cyrillic, ISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=Greek
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=Hebrew, Visual
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=Hebrew
+codeset.16.iana=windows-1255
+
+codeset.17.label=Japanese, EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=Japanese, ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=Japanese, Shift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=Korean, EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=Korean, ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=Thai, TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=Turkish
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_de.properties b/bundles/org.eclipse.wst.sse.core/config/charset_de.properties
new file mode 100644
index 0000000..2989b66
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_de.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/Unicode (UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/Unicode (UTF-16) Big Endian
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/Unicode (UTF-16BE) Big Endian
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/Unicode (UTF-16LE) Little Endian
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Lateinischer Zeichensat 1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=Zentral-/Osteuropa (Slawisch)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=S\u00fcdeuropa
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=Arabisch (logisch)
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=Arabisch
+codeset.9.iana=windows-1256
+
+codeset.10.label=Chinesisch (Nationalstandard)
+codeset.10.iana=GB18030
+
+codeset.11.label=Traditionelles Chinesisch (Big5)
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=Kyrillisch, ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=Kyrillisch, ISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=Griechisch
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=Hebr\u00e4isch (grafisch orientiert)
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=Hebr\u00e4isch
+codeset.16.iana=windows-1255
+
+codeset.17.label=Japanisch, EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=Japanisch, ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=Japanisch, Shift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=Koreanisch, EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=Koreanisch, ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=Thail\u00e4ndisch, TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=T\u00fcrkisch
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_es.properties b/bundles/org.eclipse.wst.sse.core/config/charset_es.properties
new file mode 100644
index 0000000..efaa0f4
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_es.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/Unicode(UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/Unicode(UTF-16) Big Endian
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/Unicode(UTF-16BE) Big Endian
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/Unicode(UTF-16LE) Little Endian
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latin-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=Europa central/oriental (Eslavo)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=Europa meridional
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=\u00c1rabe, l\u00f3gico
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=\u00c1rabe
+codeset.9.iana=windows-1256
+
+codeset.10.label=Chino, est\u00e1ndar nacional
+codeset.10.iana=GB18030
+
+codeset.11.label=Chino tradicional, Big5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=Cir\u00edlico, ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=Cir\u00edlico, ISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=Griego
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=Hebreo, visual
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=Hebreo
+codeset.16.iana=windows-1255
+
+codeset.17.label=Japon\u00e9s, EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=Japon\u00e9s, ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=Japon\u00e9s, Shift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=Coreano, EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=Coreano, ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=Tailand\u00e9s, TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=Turco
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_fr.properties b/bundles/org.eclipse.wst.sse.core/config/charset_fr.properties
new file mode 100644
index 0000000..9235094
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_fr.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/Unicode(UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/Unicode(UTF-16) Big Endian
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/Unicode(UTF-16BE) Big Endian
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/Unicode(UTF-16LE) Little Endian
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latin-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=Europe centrale et de l'Est (slave)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=Europe m\u00e9ridionale
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=Arabic, Logical
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=Arabic
+codeset.9.iana=windows-1256
+
+codeset.10.label=Chinois simplifi\u00e9
+codeset.10.iana=GB18030
+
+codeset.11.label=Chinois traditionnel, Big5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=Cyrillique, ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=Cyrillique, ISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=Grec
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=H\u00e9breu, visuel
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=H\u00e9breu
+codeset.16.iana=windows-1255
+
+codeset.17.label=Japonais, EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=Japonais, ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=Japonais, Shift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=Cor\u00e9en, EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=Cor\u00e9en, ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=Tha\u00ef, TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=Turc
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_it.properties b/bundles/org.eclipse.wst.sse.core/config/charset_it.properties
new file mode 100644
index 0000000..89fcb04
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_it.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/Unicode(UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/Unicode(UTF-16) Big Endian
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/Unicode(UTF-16BE) Big Endian
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/Unicode(UTF-16LE) Little Endian
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latin-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=Europa centrale/orientale (Slavo)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=Europa del sud
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=Arabo, Logico
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=Arabo
+codeset.9.iana=windows-1256
+
+codeset.10.label=Cinese, Nazionale standard
+codeset.10.iana=GB18030
+
+codeset.11.label=Cinese tradizionale, Big5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=Cirillico, ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=Cirillico, ISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=Greco
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=Ebraico, visivo
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=Ebraico
+codeset.16.iana=windows-1255
+
+codeset.17.label=Giapponese, EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=Giapponese, ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=Giapponese, Shift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=Coreano, EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=Coreano, ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=Tailandese, TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=Turco
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_ja.properties b/bundles/org.eclipse.wst.sse.core/config/charset_ja.properties
new file mode 100644
index 0000000..5a0bf8f
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_ja.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/ \u30e6\u30cb\u30b3\u30fc\u30c9 (UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/ \u30e6\u30cb\u30b3\u30fc\u30c9 (UTF-16) \u30d3\u30c3\u30b0\u30fb\u30a8\u30f3\u30c7\u30a3\u30a2\u30f3
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/ \u30e6\u30cb\u30b3\u30fc\u30c9 (UTF-16BE) \u30d3\u30c3\u30b0\u30fb\u30a8\u30f3\u30c7\u30a3\u30a2\u30f3
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/ \u30e6\u30cb\u30b3\u30fc\u30c9 (UTF-16LE) \u30ea\u30c8\u30eb\u30fb\u30a8\u30f3\u30c7\u30a3\u30a2\u30f3
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latin-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=\u4e2d\u592e/\u6771\u30e8\u30fc\u30ed\u30c3\u30d1 (\u30b9\u30e9\u30d6\u8a9e) (Slavic)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=\u5357\u30e8\u30fc\u30ed\u30c3\u30d1
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=\u30a2\u30e9\u30d3\u30a2\u8a9e\u3001\u8ad6\u7406
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=\u30a2\u30e9\u30d3\u30a2\u8a9e
+codeset.9.iana=windows-1256
+
+codeset.10.label=\u4e2d\u56fd\u8a9e\u3001\u6a19\u6e96
+codeset.10.iana=GB18030
+
+codeset.11.label=\u4e2d\u56fd\u8a9e (\u7e41\u4f53\u5b57), Big5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=\u30ad\u30ea\u30eb\u6587\u5b57\u3001ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=\u30ad\u30ea\u30eb\u6587\u5b57\u3001ISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=\u30ae\u30ea\u30b7\u30e3\u8a9e
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=\u30d8\u30d6\u30e9\u30a4\u8a9e\u3001\u30d3\u30b8\u30e5\u30a2\u30eb
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=\u30d8\u30d6\u30e9\u30a4\u8a9e
+codeset.16.iana=windows-1255
+
+codeset.17.label=\u65e5\u672c\u8a9e\u3001EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=\u65e5\u672c\u8a9e\u3001ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=\u65e5\u672c\u8a9e\u3001\u30b7\u30d5\u30c8 JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=\u97d3\u56fd\u8a9e\u3001EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=\u97d3\u56fd\u8a9e\u3001ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=\u30bf\u30a4\u8a9e\u3001TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=\u30c8\u30eb\u30b3\u8a9e
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_ko.properties b/bundles/org.eclipse.wst.sse.core/config/charset_ko.properties
new file mode 100644
index 0000000..edb5a44
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_ko.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/\uc720\ub2c8\ucf54\ub4dc(UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/\uc720\ub2c8\ucf54\ub4dc(UTF-16) Big Endian
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/\uc720\ub2c8\ucf54\ub4dc(UTF-16BE) Big Endian
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/\uc720\ub2c8\ucf54\ub4dc(UTF-16LE) Little Endian
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO \ub77c\ud2f4-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=\uc911\uc559/\ub3d9\ubd80 \uc720\ub7fd(\uc2ac\ub77c\ube0c\uc5b4)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=\ub0a8\ubd80 \uc720\ub7fd
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=\uc544\ub78d, \ub17c\ub9ac\uc801
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=\uc544\ub78d
+codeset.9.iana=windows-1256
+
+codeset.10.label=\uc911\uad6d\uc5b4, \uc790\uad6d \ud45c\uc900
+codeset.10.iana=GB18030
+
+codeset.11.label=\ub300\ub9cc\uc5b4, Big5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=\uc2dc\ub9b4\uc5b4, ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=\uc2dc\ub9b4\uc5b4, ISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=\uadf8\ub9ac\uc2a4\uc5b4
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=\ud788\ube0c\ub9ac\uc5b4, \ube44\uc8fc\uc5bc
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=\ud788\ube0c\ub9ac\uc5b4
+codeset.16.iana=windows-1255
+
+codeset.17.label=\uc77c\ubcf8\uc5b4, EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=\uc77c\ubcf8\uc5b4, ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=\uc77c\ubcf8\uc5b4, Shift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=\ud55c\uad6d\uc5b4, EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=\ud55c\uad6d\uc5b4, ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=\ub300\ub9cc\uc5b4, TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=\ud130\ud0a4\uc5b4
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_pt_BR.properties b/bundles/org.eclipse.wst.sse.core/config/charset_pt_BR.properties
new file mode 100644
index 0000000..b17cb86
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_pt_BR.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/Unicode(UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/Unicode(UTF-16) Big Endian
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/Unicode(UTF-16BE) Big Endian
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/Unicode(UTF-16LE) Little Endian
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latino-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=Europeu Central/Leste (Eslavo)
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=Europeu Ant\u00e1rtico
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=Ar\u00e1bico, L\u00f3gica
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=\u00c1rabe
+codeset.9.iana=windows-1256
+
+codeset.10.label=Chin\u00eas, Padr\u00e3o Nacional
+codeset.10.iana=GB18030
+
+codeset.11.label=Chin\u00eas Tradicional, Big5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=Cir\u00edlico, ISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=Cir\u00edlico, ISO-8859-4
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=Grego
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=Hebraico, Visual
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=Hebraico
+codeset.16.iana=windows-1255
+
+codeset.17.label=Japon\u00eas, EUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=Japon\u00eas, ISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=Japon\u00eas, Shift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=Coreano, EUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=Coreano, ISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=Thai, TISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=Turco
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_zh_CN.properties b/bundles/org.eclipse.wst.sse.core/config/charset_zh_CN.properties
new file mode 100644
index 0000000..781f4c2
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_zh_CN.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646\uff0fUnicode\uff08UTF-8\uff09
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646\uff0fUnicode\uff08UTF-16\uff09\u5927\u5c3e\u6570\u6cd5
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646\uff0fUnicode\uff08UTF-16BE\uff09\u5927\u5c3e\u6570\u6cd5
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646\uff0fUnicode\uff08UTF-16LE\uff09\u5c0f\u5c3e\u6570\u6cd5
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latin-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=\u4e2d\u6b27\uff0f\u4e1c\u6b27\uff08\u65af\u62c9\u592b\u8bed\uff09
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=\u5357\u6b27
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=\u963f\u62c9\u4f2f\u8bed\uff0c\u903b\u8f91
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=\u963f\u62c9\u4f2f\u8bed
+codeset.9.iana=windows-1256
+
+codeset.10.label=\u4e2d\u6587\uff0c\u56fd\u5bb6\u6807\u51c6
+codeset.10.iana=GB18030
+
+codeset.11.label=\u7e41\u4f53\u4e2d\u6587\uff0cBig5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=Cyrillic\uff0cISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=Cyrillic\uff0cISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=\u5e0c\u814a\u8bed
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=\u5e0c\u4f2f\u83b1\u8bed\uff0c\u53ef\u89c6
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=\u5e0c\u4f2f\u83b1\u8bed
+codeset.16.iana=windows-1255
+
+codeset.17.label=\u65e5\u8bed\uff0cEUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=\u65e5\u8bed\uff0cISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=\u65e5\u8bed\uff0cShift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=\u97e9\u56fd\u8bed\uff0cEUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=\u97e9\u56fd\u8bed\uff0cISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=\u6cf0\u56fd\u8bed\uff0cTISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=\u571f\u8033\u5176\u8bed
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/charset_zh_TW.properties b/bundles/org.eclipse.wst.sse.core/config/charset_zh_TW.properties
new file mode 100644
index 0000000..2fcb80e
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/charset_zh_TW.properties
@@ -0,0 +1,83 @@
+! the number of codeset names
+totalnumber=24
+
+! Only translate the xx.label strings. Do not translate the xx.iana or xx.java strings.
+
+codeset.0.label=ISO 10646/Unicode(UTF-8)
+codeset.0.iana=UTF-8
+
+codeset.1.label=ISO 10646/Unicode(UTF-16) \u5927\u5e8f\u6392\u5217\u6cd5
+codeset.1.iana=UTF-16
+
+codeset.2.label=ISO 10646/Unicode(UTF-16BE) \u5927\u5e8f\u6392\u5217\u6cd5
+codeset.2.iana=UTF-16BE
+
+codeset.3.label=ISO 10646/Unicode(UTF-16LE) \u5c0f\u5e8f\u6392\u5217\u6cd5
+codeset.3.iana=UTF-16LE
+
+codeset.4.label=US ASCII
+codeset.4.iana=US-ASCII
+
+codeset.5.label=ISO Latin-1
+codeset.5.iana=ISO-8859-1
+
+! (Slavic: Czech, Croat, German, Hungarian, Polish, Romanian, Slovak, Slovenian)
+codeset.6.label=\u4e2d\u6b50/\u6771\u6b50\uff08\u65af\u62c9\u592b\u6587\uff09
+codeset.6.iana=ISO-8859-2
+
+! (Esperanto, Galician, Maltese, Turkish)
+codeset.7.label=\u5357\u6b50
+codeset.7.iana=ISO-8859-3
+
+codeset.8.label=\u963f\u62c9\u4f2f\u6587\uff0c\u908f\u8f2f
+codeset.8.iana=ISO-8859-6
+
+codeset.9.label=\u963f\u62c9\u4f2f\u6587
+codeset.9.iana=windows-1256
+
+codeset.10.label=\u4e2d\u6587\uff0c\u570b\u969b\u6a19\u6e96
+codeset.10.iana=GB18030
+
+codeset.11.label=\u7e41\u9ad4\u4e2d\u6587\uff0cBig5
+codeset.11.iana=BIG5
+
+! (Estonian, Latvian, Lithuanian)
+codeset.12.label=\u65af\u62c9\u592b\u6587\uff0cISO-8859-4
+codeset.12.iana=ISO-8859-4
+
+! (Bulgarian, Byelorussian, Macedonian, Serbian, Ukrainian)
+codeset.13.label=\u65af\u62c9\u592b\u6587\uff0cISO-8859-5
+codeset.13.iana=ISO-8859-5
+
+codeset.14.label=\u5e0c\u81d8\u6587
+codeset.14.iana=ISO-8859-7
+
+codeset.15.label=\u5e0c\u4f2f\u4f86\u6587\uff0c\u8996\u89ba
+codeset.15.iana=ISO-8859-8
+
+! label=Hebrew, Logical
+! iana=ISO-8859-8-I
+
+codeset.16.label=\u5e0c\u4f2f\u4f86\u6587
+codeset.16.iana=windows-1255
+
+codeset.17.label=\u65e5\u6587\uff0cEUC-JP
+codeset.17.iana=EUC-JP
+
+codeset.18.label=\u65e5\u6587\uff0cISO 2022
+codeset.18.iana=ISO-2022-JP
+
+codeset.19.label=\u65e5\u6587\uff0cShift-JIS
+codeset.19.iana=Shift_JIS
+
+codeset.20.label=\u97d3\u6587\uff0cEUC-KR
+codeset.20.iana=EUC-KR
+
+codeset.21.label=\u97d3\u6587\uff0cISO 2022
+codeset.21.iana=ISO-2022-KR
+
+codeset.22.label=\u6cf0\u6587\uff0cTISI
+codeset.22.iana=TIS-620
+
+codeset.23.label=\u571f\u8033\u5176\u6587
+codeset.23.iana=ISO-8859-9
diff --git a/bundles/org.eclipse.wst.sse.core/config/defaultIANA.properties b/bundles/org.eclipse.wst.sse.core/config/defaultIANA.properties
new file mode 100644
index 0000000..94a4063
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/defaultIANA.properties
@@ -0,0 +1,17 @@
+# This file lists charset mappings between an encoding and its default
+# equivalent encoding listed in charset.properties
+# format: anyname=iana name listed in charset.properties
+
+# No translation necessary
+windows-1252=ISO-8859-1
+windows-932=Shift_JIS
+windows-31j=Shift_JIS
+x-euc-jp-linux=EUC-JP
+windows-949=EUC-KR
+x-windows-949=EUC-KR
+windows-950=BIG5
+x-windows-950=BIG5
+windows-936=GB18030
+x-mswin-936=GB18030
+GB2312=GB18030
+x-EUC-CN=GB18030
\ No newline at end of file
diff --git a/bundles/org.eclipse.wst.sse.core/config/override.properties b/bundles/org.eclipse.wst.sse.core/config/override.properties
new file mode 100644
index 0000000..06ad11b
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/config/override.properties
@@ -0,0 +1,21 @@
+###############################################################################
+# Copyright (c) 2001, 2004 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+# Jens Lukowski/Innoopract - initial renaming/restructuring
+#
+###############################################################################
+# This file lists charset mappings which supplement or override those
+# provided by the VM. There are few cases where this should be required.
+# Note: there is no integrety checking to see if the validCharsetName is indeed
+# valid for a given VM, so errors would only show up when attempted to
+# be used.
+# format: anyname=validjavaCharsetName
+ISO-8859-8-I=ISO-8859-8
+X-SJIS=Shift_JIS
+X-EUC-JP=Shift_JIS
diff --git a/bundles/org.eclipse.wst.sse.core/plugin.xml b/bundles/org.eclipse.wst.sse.core/plugin.xml
index 418820c..c4448f1 100644
--- a/bundles/org.eclipse.wst.sse.core/plugin.xml
+++ b/bundles/org.eclipse.wst.sse.core/plugin.xml
@@ -28,7 +28,6 @@
<import plugin="org.eclipse.wst.common.contentmodel"/>
<import plugin="org.eclipse.core.runtime"/>
- <import plugin="org.eclipse.wst.common.encoding"/>
<import plugin="org.eclipse.core.filebuffers"/>
<import plugin="org.eclipse.wst.xml.uriresolver"/>
</requires>
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedIO.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedIO.java
new file mode 100644
index 0000000..d30f6c9
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedIO.java
@@ -0,0 +1,258 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.util.Properties;
+
+import org.eclipse.core.runtime.IPath;
+import org.eclipse.core.runtime.Path;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.core.runtime.content.IContentDescription;
+import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
+import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
+import org.osgi.framework.Bundle;
+
+
+public abstract class CodedIO {
+
+ private final boolean DEBUG = false;
+
+ public static final int MAX_BUF_SIZE = 1024 * 2;
+
+ public static final int MAX_MARK_SIZE = MAX_BUF_SIZE;
+
+ public static final String NO_SPEC_DEFAULT = "NoSpecDefault"; //$NON-NLS-1$
+
+ private static Properties overridenCharsets = null;
+
+ /**
+ * <p>
+ * There are two well known understood cases where the standard/default
+ * Java Mappings are not sufficient. (Thanks to Hirotaka Matsumoto for
+ * providing these two). I believe there are others that individual
+ * customers have requested to override on a case by case basis, but I've
+ * lost the details. TODO-future: document some of those use-cases.
+ * </p>
+ * <ul>
+ * <li>ISO-8859-8-I</li>
+ * <p>
+ * In the code conversion point of view, ISO-9959-8 and ISO-8859-8-I are
+ * the same. However. the representation on the browser is different. (
+ * It's very very hard to explain this into the words, but once you will
+ * see, you will understand it :) Many BiDi HTML/JSPs use ISO-8859-8-I in
+ * META/page directive. So WSAD needs to support this encoding.
+ * </p>
+ * <li>X-SJIS</li>
+ * <p>
+ * Because Mosaic/Navigator 2.0 supported only X-SJIS/X-EUC-JP, lots of
+ * old HTML files used X-SJIS/X-EUC-JP so that the customers still want us
+ * to support this code conversion for HTML files.
+ * </p>
+ * </ul>
+ *
+ * @param detectedCharsetName
+ * @return the detectedCharsetName, if no overrides, otherwise the charset
+ * name that should be used instead of detectedCharsetName
+ */
+ /**
+ * This method is deliberatly 'default access' since clients should not
+ * need to access this information directly.
+ */
+ static public String checkMappingOverrides(String detectedCharsetName) {
+ // This method MUST return what was passed in, if
+ // there are no
+ // overrides.
+ String result = detectedCharsetName;
+ String newResult = getOverridenCharsets().getProperty(detectedCharsetName);
+ if (newResult != null) {
+ result = newResult;
+ }
+ return result;
+ }
+
+ /**
+ * Note: once this instance is created, trace info still needs to be
+ * appended by caller, depending on the context its created.
+ */
+ public static EncodingMemento createEncodingMemento(byte[] detectedBom, String javaCharsetName, String detectedCharsetName, String unSupportedName, String specDefaultEncoding, String reason) {
+ EncodingMemento result = new EncodingMemento();
+ result.setJavaCharsetName(javaCharsetName);
+ result.setDetectedCharsetName(detectedCharsetName);
+ // TODO: if detectedCharset and spec default is
+ // null, need to use "work
+ // bench based" defaults.
+ if (specDefaultEncoding == null)
+ result.setAppropriateDefault(NO_SPEC_DEFAULT);
+ else
+ result.setAppropriateDefault(specDefaultEncoding);
+ if (unSupportedName != null) {
+ result.setInvalidEncoding(unSupportedName);
+ }
+ // check if valid
+ try {
+ Charset.isSupported(javaCharsetName);
+ } catch (IllegalCharsetNameException e) {
+ result.setInvalidEncoding(javaCharsetName);
+ }
+
+ // check UTF83ByteBOMUsed and UnicodeStream
+ if (detectedBom != null) {
+ if (detectedBom.length == 2)
+ result.setUnicodeStream(true);
+ else if (detectedBom.length == 3)
+ result.setUTF83ByteBOMUsed(true);
+ }
+ return result;
+ }
+
+ /**
+ * Note: once this instance is created, trace info still needs to be
+ * appended by caller, depending on the context its created.
+ */
+ public static EncodingMemento createEncodingMemento(String detectedCharsetName) {
+ return createEncodingMemento(detectedCharsetName, null);
+ }
+
+ /**
+ * Note: once this instance is created, trace info still needs to be
+ * appended by caller, depending on the context its created.
+ */
+ public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason) {
+ return createEncodingMemento(detectedCharsetName, reason, null);
+ }
+
+ /**
+ * Note: once this instance is created, trace info still needs to be
+ * appended by caller, depending on the context its created.
+ */
+ public static EncodingMemento createEncodingMemento(String detectedCharsetName, String reason, String specDefaultEncoding) {
+ EncodingMemento result = new EncodingMemento();
+ result = new EncodingMemento();
+ String javaCharset = getAppropriateJavaCharset(detectedCharsetName);
+ result.setJavaCharsetName(javaCharset);
+ result.setDetectedCharsetName(detectedCharsetName);
+ // TODO: if detectedCharset and spec default is
+ // null, need to use "work
+ // bench based" defaults.
+ if (specDefaultEncoding == null)
+ result.setAppropriateDefault(NO_SPEC_DEFAULT);
+ else
+ result.setAppropriateDefault(specDefaultEncoding);
+ // check if valid
+ try {
+ Charset.isSupported(javaCharset);
+ } catch (IllegalCharsetNameException e) {
+ result.setInvalidEncoding(javaCharset);
+ }
+
+ return result;
+ }
+
+ /**
+ * This method can return null, if invalid charset name (in which case
+ * "appropriateDefault" should be used, if a name is really need for some
+ * "save anyway" cases).
+ *
+ * @param detectedCharsetName
+ * @return
+ */
+ public static String getAppropriateJavaCharset(String detectedCharsetName) {
+ // we don't allow null argument (or risk NPE or
+ // IllegalArgumentException later at several
+ // points.
+ Assert.isNotNull(detectedCharsetName);
+ String result = detectedCharsetName;
+ // 1. Check explicit mapping overrides from
+ // property file
+ result = CodedIO.checkMappingOverrides(detectedCharsetName);
+ // 2. Use the "canonical" name from JRE mappings
+ // Note: see Charset JavaDoc, the name you get one
+ // with can be alias,
+ // the name you get back is "standard" name.
+ Charset javaCharset = null;
+ // Note: this will immediatly throw
+ // "UnsuppotedCharsetException" if it
+ // invalid. Issue: Is it more client friendly to
+ // eat that exception and return null?
+ javaCharset = Charset.forName(result);
+ if (javaCharset != null) {
+ result = javaCharset.name();
+ }
+ return result;
+ }
+
+ /**
+ * @return Returns the overridenCharsets.
+ */
+ private static Properties getOverridenCharsets() {
+ if (overridenCharsets == null) {
+ overridenCharsets = new Properties();
+ Bundle keyBundle = Platform.getBundle(ICodedResourcePlugin.ID);
+ IPath keyPath = new Path("config/override.properties"); //$NON-NLS-1$
+ URL location = Platform.find(keyBundle, keyPath);
+ InputStream propertiesInputStream = null;
+ try {
+ propertiesInputStream = location.openStream();
+ overridenCharsets.load(propertiesInputStream);
+ } catch (IOException e) {
+ // if can't read, just assume there's no
+ // overrides
+ // and repeated attempts will not occur,
+ // since they
+ // will be represented by an empty
+ // Properties object
+ }
+ }
+ return overridenCharsets;
+ }
+
+ /**
+ * This class need not be instantiated (though its subclasses can be).
+ */
+ protected CodedIO() {
+ super();
+ }
+
+ protected EncodingMemento createMemento(IContentDescription contentDescription) {
+ EncodingMemento result;
+ String appropriateDefault = contentDescription.getContentType().getDefaultCharset();
+ String detectedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.DETECTED_CHARSET);
+ String unSupportedCharset = (String) contentDescription.getProperty(IContentDescriptionExtended.UNSUPPORTED_CHARSET);
+ String javaCharset = contentDescription.getCharset();
+ // integrity checks for debugging
+ if (javaCharset == null) {
+ Logger.log(Logger.INFO_DEBUG, "charset equaled null!"); //$NON-NLS-1$
+ } else if (javaCharset.length() == 0) {
+ Logger.log(Logger.INFO_DEBUG, "charset equaled emptyString!"); //$NON-NLS-1$
+ }
+ byte[] BOM = (byte[]) contentDescription.getProperty(IContentDescription.BYTE_ORDER_MARK);
+ //result = (EncodingMemento)
+ // contentDescription.getProperty(IContentDescriptionExtended.ENCODING_MEMENTO);
+ result = createEncodingMemento(BOM, javaCharset, detectedCharset, unSupportedCharset, null, null);
+ if (!result.isValid()) {
+ result.setAppropriateDefault(appropriateDefault);
+ // integrity check for debugging "invalid" cases.
+ // the apprriate default we have, should equal what's in the
+ // detected field. (not sure this is always required)
+ if (DEBUG && appropriateDefault != null && !appropriateDefault.equals(detectedCharset)) {
+ Logger.log(Logger.INFO_DEBUG, "appropriate did not equal detected, as expected for invalid charset case"); //$NON-NLS-1$
+ }
+ }
+ return result;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedReaderCreator.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedReaderCreator.java
new file mode 100644
index 0000000..c7b99c4
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedReaderCreator.java
@@ -0,0 +1,511 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import java.io.BufferedInputStream;
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
+
+import org.eclipse.core.resources.IFile;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.core.runtime.IProgressMonitor;
+import org.eclipse.core.runtime.IStatus;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.core.runtime.Status;
+import org.eclipse.core.runtime.content.IContentDescription;
+import org.eclipse.core.runtime.content.IContentTypeManager;
+import org.eclipse.core.runtime.jobs.Job;
+import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
+import org.eclipse.wst.sse.core.internal.encoding.util.BufferedLimitedStream;
+import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
+import org.eclipse.wst.sse.core.internal.encoding.util.NullInputStream;
+import org.eclipse.wst.sse.core.internal.encoding.util.UnicodeBOMEncodingDetector;
+import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail;
+
+
+/**
+ * The purpose of this class is to centralize analysis of a file to determine
+ * the most appropriate rules of decoding it. The intended use is to set the
+ * input, then get the reader for that input which will have its encoding set
+ * appropriately. Additionally, there is an EncodingMemento provided, which
+ * will be required, in some cases, to later determine the most appropriate
+ * form of encoded output.
+ */
+public class CodedReaderCreator extends CodedIO {
+
+
+ private boolean fClientSuppliedStream;
+
+
+ private EncodingMemento fEncodingMemento;
+
+ private EncodingRule fEncodingRule;
+
+ private String fFilename;
+
+ private IFile fIFile;
+
+
+ private InputStream fInputStream;
+
+ public CodedReaderCreator() {
+
+ super();
+ }
+
+ public CodedReaderCreator(IFile file) throws CoreException, IOException {
+
+ this();
+ set(file);
+ setEncodingRule(EncodingRule.CONTENT_BASED);
+ }
+
+ public CodedReaderCreator(IFile file, EncodingRule encodingRule) throws CoreException, IOException {
+
+ this();
+ set(file);
+ setEncodingRule(encodingRule);
+ }
+
+ public CodedReaderCreator(String filename, InputStream inputStream) {
+
+ this();
+ set(filename, inputStream);
+ setEncodingRule(EncodingRule.CONTENT_BASED);
+ }
+
+ public CodedReaderCreator(String filename, InputStream inputStream, EncodingRule encodingRule) {
+
+ this();
+ set(filename, inputStream);
+ setEncodingRule(encodingRule);
+ }
+
+ private EncodingMemento checkForEncodingInContents(InputStream limitedStream) throws CoreException, IOException {
+ EncodingMemento result = null;
+
+ // if encoding memento already set, then iFile must
+ // have been set, and no need to get again.
+ if (fEncodingMemento != null) {
+ result = fEncodingMemento;
+ } else {
+ if (fClientSuppliedStream) {
+ try {
+ limitedStream.reset();
+ IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
+ IContentDescription contentDescription = contentTypeManager.getDescriptionFor(limitedStream, fFilename, IContentDescription.ALL);
+ if (contentDescription != null) {
+ fEncodingMemento = createMemento(contentDescription);
+ }
+ result = fEncodingMemento;
+ } finally {
+ limitedStream.reset();
+ }
+ } else {
+ //throw new IllegalStateException("unexpected state:
+ // encodingMemento was null but no input stream supplied by
+ // client"); //$NON-NLS-1$
+ result = null;
+ }
+ }
+
+ if (result != null && !result.isValid() && !forceDefault()) {
+ throw new UnsupportedCharsetExceptionWithDetail(result);
+ }
+
+ return result;
+ }
+
+ /**
+ * @param resettableLimitedStream
+ */
+ private EncodingMemento checkStreamForBOM(InputStream resettableLimitedStream) {
+ EncodingMemento result = null;
+ UnicodeBOMEncodingDetector unicodeBOMEncodingDetector = new UnicodeBOMEncodingDetector();
+ unicodeBOMEncodingDetector.set(resettableLimitedStream);
+ result = unicodeBOMEncodingDetector.getEncodingMemento();
+ return result;
+ }
+
+ /**
+ * @param iFile
+ * @throws CoreException
+ * @throws IOException
+ */
+ private EncodingMemento findMementoFromFileCase() throws CoreException, IOException {
+ EncodingMemento result = null;
+ IContentDescription contentDescription = null;
+ try {
+ // This method provides possible improved performance at the
+ // cost of sometimes returning null
+ if (fIFile.exists())
+ contentDescription = fIFile.getContentDescription();
+ } catch (CoreException e) {
+ // Assume if core exception occurs, we can still try more
+ // expensive
+ // discovery options.
+ Logger.logException(e);
+ }
+ if (contentDescription == null && fIFile.isAccessible()) {
+ InputStream contents = null;
+ try {
+ contents = fIFile.getContents();
+ contentDescription = Platform.getContentTypeManager().getDescriptionFor(contents, fIFile.getName(), IContentDescription.ALL);
+ } catch (CoreException e1) {
+ // Assume if core exception occurs, we can't really do much
+ // with
+ // determining encoding, etc.
+ Logger.logException(e1);
+ throw e1;
+ } catch (IOException e2) {
+ // We likely couldn't get the contents of the file, something
+ // is really wrong
+ Logger.logException(e2);
+ throw e2;
+ }
+ if (contents != null) {
+ try {
+ contents.close();
+ } catch (IOException e2) {
+ Logger.logException(e2);
+ }
+ }
+ }
+ if (contentDescription != null) {
+ result = createMemento(contentDescription);
+ }
+
+ return result;
+ }
+
+ /**
+ * The primary method which contains the highest level rules for how to
+ * decide appropriate decoding rules: 1. first check for unicode stream 2.
+ * then looked for encoding specified in content (according to the type of
+ * content that is it ... xml, html, jsp, etc. 3. then check for various
+ * settings: file settings first, if null check project settings, if null,
+ * check user preferences. 4. lastly (or, what is the last user
+ * preference) is to use "workbench defaults".
+ *
+ * @throws IOException
+ * @throws CoreException
+ */
+ private EncodingMemento findMementoFromStreamCase() throws CoreException, IOException {
+
+ EncodingMemento result = null;
+ InputStream resettableLimitedStream = null;
+ try {
+ resettableLimitedStream = getLimitedStream(getResettableStream());
+ if (resettableLimitedStream != null) {
+ // first check for unicode stream
+ result = checkStreamForBOM(resettableLimitedStream);
+ // if not that, then check contents
+ if (result == null) {
+ resettableLimitedStream.reset();
+ result = checkForEncodingInContents(resettableLimitedStream);
+ }
+
+ } else {
+ // stream null, may name's not.
+ if (fFilename != null) {
+ // filename not null
+ IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
+ IContentDescription contentDescription = contentTypeManager.getDescriptionFor(new NullInputStream(), fFilename, IContentDescription.ALL);
+ if (contentDescription != null) {
+ result = createMemento(contentDescription);
+ }
+ }
+ }
+ } finally {
+ if (resettableLimitedStream != null) {
+ handleStreamClose(resettableLimitedStream);
+ }
+ }
+ return result;
+ }
+
+ private boolean forceDefault() {
+
+ boolean result = false;
+ if (fEncodingRule != null && fEncodingRule == EncodingRule.FORCE_DEFAULT)
+ result = true;
+ return result;
+ }
+
+ public Reader getCodedReader() throws CoreException, IOException {
+
+ Reader result = null;
+ // we make a local copy of encoding memento so
+ // stream won't
+ // be accessed simultaneously.
+ EncodingMemento encodingMemento = getEncodingMemento();
+ Assert.isNotNull(encodingMemento, "Appears reader requested before file or stream set"); //$NON-NLS-1$
+ InputStream streamToReturn = getResettableStream();
+ streamToReturn.reset();
+ // if UTF 3 byte BOM is used (or UTF-16LE), the
+ // built in converters
+ // don't
+ // correct skip all three bytes ... so skip
+ // remaining one to leave
+ // stream transparently ready for client.
+ // see ... TODO look up bug number
+ if (encodingMemento.isUnicodeStream()) {
+ streamToReturn.skip(2);
+ } else if (encodingMemento.isUTF83ByteBOMUsed()) {
+ streamToReturn.skip(3);
+ }
+ String charsetName = encodingMemento.getJavaCharsetName();
+ if (charsetName == null) {
+ charsetName = encodingMemento.getDetectedCharsetName();
+ }
+ if (!encodingMemento.isValid() && !forceDefault()) {
+ throw new UnsupportedCharsetExceptionWithDetail(encodingMemento);
+ }
+
+ if (fEncodingRule == EncodingRule.FORCE_DEFAULT) {
+ charsetName = encodingMemento.getAppropriateDefault();
+ }
+ Charset charset = Charset.forName(charsetName);
+ CharsetDecoder charsetDecoder = charset.newDecoder();
+ if (fEncodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) {
+ charsetDecoder.onMalformedInput(CodingErrorAction.REPLACE);
+ charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+ } else {
+ charsetDecoder.onMalformedInput(CodingErrorAction.REPORT);
+ charsetDecoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ }
+ // more efficient to be buffered, and I know of no
+ // reason not to return
+ // that directly.
+ result = new BufferedReader(new InputStreamReader(streamToReturn, charsetDecoder));
+ return result;
+ }
+
+ public EncodingMemento getEncodingMemento() throws CoreException, IOException {
+ // figure out encoding memento from encoding strategy
+ if (fEncodingMemento == null) {
+ if (fClientSuppliedStream) {
+ fEncodingMemento = findMementoFromStreamCase();
+ } else if (fIFile != null) {
+ fEncodingMemento = findMementoFromFileCase();
+ }
+ }
+
+ // if encoding stratagy doesn't provide answer,
+ // then try file settings, project settings,
+ // user preferences, and
+ // finally workbench default.
+ //
+ if (fEncodingMemento == null || fEncodingMemento.getDetectedCharsetName() == null) {
+ fEncodingMemento = getEncodingMementoFromResourceAndPreference();
+ }
+
+ // use DefaultNameRules from NonContentBasedEncodingRules as the final
+ // default
+ if (fEncodingMemento == null) {
+ fEncodingMemento = handleNotProvidedFromContentCase();
+ }
+
+ return fEncodingMemento;
+ }
+
+ /*
+ * This method is called only when encoding is not detected in the file.
+ *
+ * Here is encoding lookup order we will try: - try resource content
+ * description (Eclipse Text file encoding) - try resource content
+ * properties (for JSP only) - try content type encoding preferences (for
+ * HTML only) - try resource content description (Eclipse Text file
+ * encoding, implicit check)
+ *
+ * Note: This method appears in both CodedReaderCreator and
+ * CodedStreamCreator (with just a minor difference). They should be kept
+ * the same.
+ */
+ private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException {
+ EncodingMemento encodingMemento = fEncodingMemento;
+
+ // Follow Eclipse Platform's direction. Get the charset from IFile.
+ if (fIFile != null) {
+ String charset = fIFile.getCharset();
+ encodingMemento = CodedIO.createEncodingMemento(charset);
+ }
+
+ return encodingMemento;
+ }
+
+ /**
+ * Ensures that an InputStream has mark/reset support, is readlimit is
+ * set, and that the stream is "limitable" (that is, reports "end of
+ * input" rather than allow going past mark). This is very specialized
+ * stream introduced to overcome
+ * https://bugs.eclipse.org/bugs/show_bug.cgi?id=67211. See also
+ * https://bugs.eclipse.org/bugs/show_bug.cgi?id=68565
+ */
+ private InputStream getLimitedStream(InputStream original) {
+ if (original == null)
+ return null;
+ if (original instanceof BufferedLimitedStream)
+ return original;
+ InputStream s = new BufferedLimitedStream(original, CodedIO.MAX_MARK_SIZE);
+ s.mark(CodedIO.MAX_MARK_SIZE);
+ return s;
+ }
+
+ private InputStream getResettableStream() throws CoreException, IOException {
+
+ InputStream resettableStream = null;
+
+ if (fIFile != null) {
+ InputStream inputStream = null;
+ try {
+ inputStream = fIFile.getContents();
+ } catch (CoreException e) {
+ // SHOULD actually check for existence of
+ // fIStorage, but
+ // for now will just assume core exception
+ // means it
+ // doesn't exist on file system, yet.
+ inputStream = new NullInputStream();
+ }
+ resettableStream = new BufferedInputStream(inputStream, CodedIO.MAX_BUF_SIZE);
+ } else {
+ if (fInputStream != null) {
+ if (fInputStream.markSupported()) {
+ resettableStream = fInputStream;
+ // try {
+ resettableStream.reset();
+ // }
+ // catch (IOException e) {
+ // // assumed just hasn't been marked yet, so ignore
+ // }
+ } else {
+ resettableStream = new BufferedInputStream(fInputStream, CodedIO.MAX_BUF_SIZE);
+ }
+ }
+ }
+
+ if (resettableStream == null) {
+ resettableStream = new NullInputStream();
+ }
+
+ // mark this once, stream at "zero" position
+ resettableStream.mark(MAX_MARK_SIZE);
+ return resettableStream;
+ }
+
+ private EncodingMemento handleNotProvidedFromContentCase() {
+
+ EncodingMemento result = null;
+ String specDefault = null;
+ // try {
+ // specDefault = getEncodingDetector().getSpecDefaultEncoding();
+ // }
+ // catch (CoreException e) {
+ // // If this exception occurs, assumes there is
+ // // no specDefault
+ // }
+ // catch (IOException e) {
+ // // If this exception occurs, assumes there is
+ // // no specDefault
+ // }
+ // finally {
+ // try {
+ // handleStreamClose(fEncodingDetectorStream);
+ // }
+ // catch (IOException e1) {
+ // // severe error, not much to do here
+ // }
+ // }
+ // this logic should be moved to 'detection' if not already
+ String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault);
+ Assert.isNotNull(charset, "post condition failed"); //$NON-NLS-1$
+ result = CodedIO.createEncodingMemento(charset);
+ return result;
+ }
+
+ /**
+ * @param resettableInputStream
+ * @throws IOException
+ */
+ private void handleStreamClose(InputStream resettableInputStream) throws IOException {
+
+ if (resettableInputStream != null) {
+ if (fClientSuppliedStream) {
+ resettableInputStream.reset();
+ } else {
+
+ resettableInputStream.close();
+ }
+ }
+ }
+
+ // TODO We just copy the content properties encoding to current resource's
+ // encoding for now. May improve the UI later by setting an informational
+ // message and/or disable the content properties encoding field.
+ // TODO: remake private else remove
+ void migrateContentPropertiesEncoding(String encoding) throws CoreException {
+ final IFile file = fIFile;
+ final String charset = encoding;
+ // TODO: externalize string later
+ Job migrater = new Job("Migrate Charset") {
+ protected IStatus run(IProgressMonitor monitor) {
+ if (file != null) {
+ try {
+ file.setCharset(charset, null);
+ } catch (CoreException e) {
+ Logger.logException(e);
+ }
+ }
+ return Status.OK_STATUS;
+ }
+ };
+ migrater.setSystem(true);
+ migrater.schedule();
+ }
+
+ private void resetAll() {
+
+ fEncodingRule = null;
+ fIFile = null;
+ fFilename = null;
+ fInputStream = null;
+ fEncodingMemento = null;
+ fClientSuppliedStream = false;
+ }
+
+ public void set(IFile iFile) throws CoreException, IOException {
+ resetAll();
+ Assert.isNotNull(iFile);
+ fIFile = iFile;
+ }
+
+ public void set(String filename, InputStream inputStream) {
+
+ resetAll();
+ fFilename = filename;
+ fInputStream = inputStream;
+ fClientSuppliedStream = true;
+ }
+
+ public void setEncodingRule(EncodingRule encodingRule) {
+
+ fEncodingRule = encodingRule;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedStreamCreator.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedStreamCreator.java
new file mode 100644
index 0000000..70f3a93
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CodedStreamCreator.java
@@ -0,0 +1,508 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayOutputStream;
+import java.io.CharArrayReader;
+import java.io.IOException;
+import java.io.OutputStream;
+import java.io.OutputStreamWriter;
+import java.io.Reader;
+import java.io.StringReader;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.UnmappableCharacterException;
+
+import org.eclipse.core.resources.IFile;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.core.runtime.IProgressMonitor;
+import org.eclipse.core.runtime.IStatus;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.core.runtime.Status;
+import org.eclipse.core.runtime.content.IContentDescription;
+import org.eclipse.core.runtime.content.IContentTypeManager;
+import org.eclipse.core.runtime.jobs.Job;
+import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
+import org.eclipse.wst.sse.core.internal.encoding.util.CodedResourcePlugin;
+import org.eclipse.wst.sse.core.internal.encoding.util.Logger;
+import org.eclipse.wst.sse.core.internal.exceptions.CharConversionErrorWithDetail;
+import org.eclipse.wst.sse.core.internal.exceptions.MalformedOutputExceptionWithDetail;
+import org.eclipse.wst.sse.core.internal.exceptions.UnsupportedCharsetExceptionWithDetail;
+
+
+public class CodedStreamCreator extends CodedIO {
+
+ private final static int INITIAL_BUFFER_SIZE = 1024 * 16;
+
+ // the 32 bytes used by default by ByteOutputStream is
+ // a little small
+ private static final String PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_ = "Program error: failed to find any charset anywhere!"; //$NON-NLS-1$
+
+ private static final String UTF_16BE_CHARSET_NAME = "UTF-16BE"; //$NON-NLS-1$
+ private static final String UTF_16LE_CHARSET_NAME = "UTF-16LE"; //$NON-NLS-1$
+ // private static final String UTF_16_CHARSET_NAME = "UTF-16";
+ // //$NON-NLS-1$
+
+ private static final String UTF_8_CHARSET_NAME = "UTF-8"; //$NON-NLS-1$
+
+ private boolean fClientSuppliedReader;
+
+ // future_TODO: this 'checkConversion' can be a little
+ // pricey for large
+ // files, chould be a user preference, or something.
+ // private static final boolean checkConversion = true;
+ private EncodingMemento fCurrentEncodingMemento;
+
+ private EncodingMemento fEncodingMemento;
+
+ private String fFilename;
+
+ private boolean fHasBeenAnalyzed;
+
+ private IFile fIFile;
+
+ private EncodingMemento fPreviousEncodingMemento;
+
+ private Reader fReader;
+
+ private Reader fResettableReader;
+ private byte[] UTF16BEBOM = new byte[]{(byte) 0xFE, (byte) 0xFF};
+
+ private byte[] UTF16LEBOM = new byte[]{(byte) 0xFF, (byte) 0xFE};
+ private byte[] UTF3BYTEBOM = new byte[]{(byte) 0xEF, (byte) 0xBB, (byte) 0xBF};
+
+ public CodedStreamCreator() {
+ super();
+ }
+
+ public CodedStreamCreator(String filename, char[] characterArray) {
+ super();
+ fFilename = filename;
+ fReader = new CharArrayReader(characterArray);
+ }
+
+ public CodedStreamCreator(String filename, Reader reader) {
+ super();
+ fFilename = filename;
+ fReader = reader;
+ }
+
+ public CodedStreamCreator(String filename, String textString) {
+ super();
+ fFilename = filename;
+ fReader = new StringReader(textString);
+ }
+
+ /**
+ * The primary method which contains the highest level rules for how to
+ * decide appropriate decoding rules: 1. first check for unicode stream 2.
+ * then looked for encoding specified in content (according to the type of
+ * content that is it ... xml, html, jsp, etc. 3. then check for various
+ * settings: file settings first, if null check project settings, if null,
+ * check user preferences. 4. lastly (or, what is the last user
+ * preference) is to use "workbench defaults".
+ */
+ private void analyze() throws CoreException, IOException {
+ Reader resettableReader = getResettableReader();
+ try {
+ if (fCurrentEncodingMemento == null) {
+ resettableReader.reset();
+ fCurrentEncodingMemento = checkForEncodingInContents();
+ }
+ // if encoding stratagy doesn't provide answer,
+ // then try file settings, project settings,
+ // user preferences, and
+ // finally workbench default.
+ //
+ if (fCurrentEncodingMemento == null || fCurrentEncodingMemento.getDetectedCharsetName() == null) {
+ resettableReader.reset();
+ fCurrentEncodingMemento = getEncodingMementoFromResourceAndPreference();
+ }
+
+ // use DefaultNameRules from NonContentBasedEncodingRules as the
+ // final default
+ if (fEncodingMemento == null) {
+ handleNotProvidedFromContentCase();
+ }
+
+ fHasBeenAnalyzed = true;
+ } finally {
+ if (resettableReader != null) {
+ resettableReader.reset();
+ }
+ }
+ }
+
+ /**
+ * Need to check conversion early on. There's some danger than old
+ * contents of a file are set to empty, if an exception occurs.
+ *
+ * @param allText
+ * @param encoding
+ * @param encodingRule
+ * @throws java.io.UnsupportedEncodingException
+ * @throws MalformedOutputExceptionWithDetail
+ * @deprecated - we need to find "cheaper" way to to this functionality so
+ * likely to go away in future
+ */
+ private void checkConversion(EncodingMemento memento, EncodingRule encodingRule) throws IOException {
+ String javaEncoding = memento.getJavaCharsetName();
+ String detectedEncoding = memento.getDetectedCharsetName();
+ Charset charset = Charset.forName(javaEncoding);
+ CharsetEncoder charsetEncoder = charset.newEncoder();
+ charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
+ charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ Reader reader = getResettableReader();
+ reader.reset();
+ int currentChar = reader.read();
+ int currentPos = 1;
+ try {
+ while (currentChar != -1) {
+ // note: this can probably be made more
+ // efficient later to
+ // check buffer by buffer, instead of
+ // character by character.
+ try {
+ boolean canConvert = charsetEncoder.canEncode((char) currentChar);
+ if (!canConvert) {
+ if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR) {
+ // if we're told to ignore the
+ // encoding conversion
+ // error,
+ // notice we still want to detect
+ // and log it. We simply
+ // don't throw the exception, and
+ // we do continue with
+ // the
+ // save.
+ Logger.log(Logger.ERROR, "Encoding Conversion Error during save"); //$NON-NLS-1$
+ } else {
+ throw new MalformedOutputExceptionWithDetail(javaEncoding, detectedEncoding, currentPos);
+ }
+ }
+ currentChar = reader.read();
+ currentPos++;
+ }
+ // IBM's JRE seems to throw NPE when DBCS char is given to
+ // SBCS charsetEncoder
+ catch (NullPointerException e) {
+ throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$
+ }
+ }
+ // if we get all the way through loop without throwing exception,
+ // then there must
+ // be an error not detectable when going character by character.
+ throw new CharConversionErrorWithDetail(javaEncoding); //$NON-NLS-1$
+ } finally {
+ reader.reset();
+ }
+ }
+
+ private EncodingMemento checkForEncodingInContents() throws CoreException, IOException {
+ EncodingMemento result = null;
+
+ // if encoding memento already set, and no need to get again.
+ if (fEncodingMemento != null) {
+ result = fEncodingMemento;
+ } else {
+ if (fClientSuppliedReader) {
+ fReader.reset();
+ IContentTypeManager contentTypeManager = Platform.getContentTypeManager();
+ try {
+ IContentDescription contentDescription = contentTypeManager.getDescriptionFor(fReader, fFilename, IContentDescription.ALL);
+ if (contentDescription != null) {
+ fEncodingMemento = createMemento(contentDescription);
+ } else {
+ fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$
+ }
+ } catch (NullPointerException e) {
+ // TODO: work around for 5/14 bug in base, should be
+ // removed when move up to 5/21
+ // just created a simple default one
+ fEncodingMemento = CodedIO.createEncodingMemento("UTF-8"); //$NON-NLS-1$
+ }
+ result = fEncodingMemento;
+ } else {
+ throw new IllegalStateException("unexpected state: encodingMemento was null but no input stream supplied"); //$NON-NLS-1$
+ }
+ }
+ // try {
+ // result = getEncodingDetector().getEncodingMemento();
+ // if (result != null && !result.isValid() && !forceDefault()) {
+ // throw new UnsupportedCharsetExceptionWithDetail(result);
+ // }
+ // }
+ // finally {
+ // handleStreamClose(fEncodingDetectorStream);
+ // }
+ return result;
+ }
+
+
+ private void dump(OutputStream outputStream, EncodingRule encodingRule, boolean use3ByteBOMifUTF8) throws CoreException, IOException {
+ getCurrentEncodingMemento();
+ String javaEncodingName = null;
+ if (encodingRule == EncodingRule.CONTENT_BASED) {
+ if (fCurrentEncodingMemento.isValid()) {
+ javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
+ } else {
+ throw new UnsupportedCharsetExceptionWithDetail(fCurrentEncodingMemento);
+ }
+ } else if (encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)
+ javaEncodingName = fCurrentEncodingMemento.getJavaCharsetName();
+ else if (encodingRule == EncodingRule.FORCE_DEFAULT)
+ javaEncodingName = fCurrentEncodingMemento.getAppropriateDefault();
+ // write appropriate "header" unicode BOM bytes
+ // Note: Java seems to write appropriate header for
+ // UTF-16, but not
+ // UTF-8 nor UTF-16BE. This
+ // may vary by JRE version, so need to test well.
+ // Note: javaEncodingName can be null in invalid
+ // cases, so we no hard
+ // to skip whole check if that's the case.
+ if (javaEncodingName != null) {
+ if ((javaEncodingName.equals(UTF_8_CHARSET_NAME) && use3ByteBOMifUTF8) || (javaEncodingName.equals(UTF_8_CHARSET_NAME) && fCurrentEncodingMemento.isUTF83ByteBOMUsed())) {
+ outputStream.write(UTF3BYTEBOM);
+ } else if (javaEncodingName.equals(UTF_16LE_CHARSET_NAME)) {
+ outputStream.write(UTF16LEBOM);
+ } else if (javaEncodingName.equals(UTF_16BE_CHARSET_NAME)) {
+ outputStream.write(UTF16BEBOM);
+ }
+ }
+ // TODO add back in line delimiter handling the
+ // "right" way (updating
+ // markers, not requiring string, etc. .. may need
+ // to move to document
+ // level)
+ //allTextBuffer =
+ // handleLineDelimiter(allTextBuffer, document);
+ Reader reader = getResettableReader();
+ // be sure to test large "readers" ... we'll need
+ // to make sure they all
+ // can reset to initial position (StringReader,
+ // CharArrayReader, and
+ // DocumentReader should all work ok).
+ reader.reset();
+ // There must be cleaner logic somehow, but the
+ // idea is that
+ // javaEncodingName can be null
+ // if original detected encoding is not valid (and
+ // if FORCE_DEFAULT was
+ // not specified). Hence, we WANT the first
+ // Charset.forName to
+ // throw appropriate exception.
+ Charset charset = null;
+
+ // this call checks "override" properties file
+ javaEncodingName = CodedIO.getAppropriateJavaCharset(javaEncodingName);
+
+ if (javaEncodingName == null) {
+ charset = Charset.forName(fCurrentEncodingMemento.getDetectedCharsetName());
+ } else {
+ charset = Charset.forName(javaEncodingName);
+ }
+ CharsetEncoder charsetEncoder = charset.newEncoder();
+ if (!(encodingRule == EncodingRule.IGNORE_CONVERSION_ERROR)) {
+ charsetEncoder.onMalformedInput(CodingErrorAction.REPORT);
+ charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPORT);
+ } else {
+ charsetEncoder.onMalformedInput(CodingErrorAction.REPLACE);
+ charsetEncoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+
+ }
+ OutputStreamWriter outputStreamWriter = new OutputStreamWriter(outputStream, charsetEncoder);
+ //TODO: this may no longer be needed (and is at
+ // least wrong spot for
+ // it).
+ // if (checkConversion && (!(encodingRule ==
+ // EncodingRule.IGNORE_CONVERSION_ERROR))) {
+ // checkConversion(fCurrentEncodingMemento,
+ // encodingRule);
+ // }
+ char[] charbuf = new char[CodedIO.MAX_BUF_SIZE];
+ int nRead = 0;
+ try {
+ while (nRead != -1) {
+ nRead = reader.read(charbuf, 0, MAX_BUF_SIZE);
+ if (nRead > 0) {
+ outputStreamWriter.flush();
+ outputStreamWriter.write(charbuf, 0, nRead);
+ }
+ }
+ } catch (UnmappableCharacterException e) {
+ checkConversion(fCurrentEncodingMemento, encodingRule);
+ } finally {
+ // since we don't own the original output stream, we
+ // won't close it ours.
+ // the caller who passed it to us must close original one
+ // when appropriate.
+ // (but we do flush to be sure all up-to-date)
+ outputStreamWriter.flush();
+ }
+ }
+
+ private boolean get3ByteBOMPreference() {
+ return CodedResourcePlugin.getDefault().getPluginPreferences().getBoolean(CommonEncodingPreferenceNames.USE_3BYTE_BOM_WITH_UTF8);
+ }
+
+ public ByteArrayOutputStream getCodedByteArrayOutputStream() throws CoreException, IOException {
+ return getCodedByteArrayOutputStream(EncodingRule.CONTENT_BASED);
+ }
+
+ public ByteArrayOutputStream getCodedByteArrayOutputStream(EncodingRule encodingRule) throws CoreException, IOException {
+ //Assert.isNotNull(fPreviousEncodingMemento,
+ // "previousEncodingMemento
+ // needs to be set first");
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(INITIAL_BUFFER_SIZE);
+ dump(byteArrayOutputStream, encodingRule, get3ByteBOMPreference());
+ return byteArrayOutputStream;
+ }
+
+ public EncodingMemento getCurrentEncodingMemento() throws CoreException, IOException {
+ //Assert.isNotNull(fPreviousEncodingMemento,
+ // "previousEncodingMemento
+ // needs to be set first");
+ if (!fHasBeenAnalyzed) {
+ analyze();
+ }
+ // post condition
+ Assert.isNotNull(fCurrentEncodingMemento);
+ // be sure to carry over appropriate encoding
+ // "state" that may be
+ // relevent.
+ if (fPreviousEncodingMemento != null) {
+ fCurrentEncodingMemento.setUTF83ByteBOMUsed(fPreviousEncodingMemento.isUTF83ByteBOMUsed());
+ }
+ return fCurrentEncodingMemento;
+ }
+
+ /*
+ * This method is called only when encoding is not detected in the file.
+ *
+ * Here is encoding lookup order we will try: - try resource content
+ * description (Eclipse Text file encoding) - try resource content
+ * properties (for JSP only) - try content type encoding preferences (for
+ * HTML only) - try resource content description (Eclipse Text file
+ * encoding, implicit check)
+ *
+ * Note: This method appears in both CodedReaderCreator and
+ * CodedStreamCreator (with just a minor difference). They should be kept
+ * the same.
+ */
+ private EncodingMemento getEncodingMementoFromResourceAndPreference() throws IOException, CoreException {
+ EncodingMemento encodingMemento = fEncodingMemento;
+
+ // Follow Eclipse Platform's direction. Get the charset from IFile.
+ if (fIFile != null) {
+ String charset = fIFile.getCharset();
+ encodingMemento = CodedIO.createEncodingMemento(charset);
+ }
+
+ return encodingMemento;
+ }
+
+ private Reader getResettableReader() {
+ if (fResettableReader == null) {
+ if (fReader.markSupported()) {
+ fResettableReader = fReader;
+ } else {
+ fResettableReader = new BufferedReader(fReader);
+ try {
+ fResettableReader.mark(MAX_MARK_SIZE);
+ } catch (IOException e) {
+ // impossible, since we just checked if
+ // markable
+ throw new Error(e);
+ }
+
+ }
+ }
+ return fResettableReader;
+ }
+
+ protected void handleNotProvidedFromContentCase() {
+ // move to "detectors" if not already
+ String specDefault = null;
+ //specDefault = getEncodingDetector().getSpecDefaultEncoding();
+ String charset = NonContentBasedEncodingRules.useDefaultNameRules(specDefault);
+ Assert.isNotNull(charset, PROGRAM_ERROR__FAILED_TO_FIND_ANY_CHARSET_ANYWHERE_);
+ fCurrentEncodingMemento = CodedIO.createEncodingMemento(charset);
+ }
+
+ // TODO We just copy the content properties encoding to current resource's
+ // encoding for now. May improve the UI later by setting an informational
+ // message and/or disable the content properties encoding field.
+ // TODO make priviate if needed, else remove
+ void migrateContentPropertiesEncoding(String encoding) throws CoreException {
+ if (fIFile != null)
+ fIFile.setCharset(encoding, null);
+ final IFile file = fIFile;
+ final String charset = encoding;
+ // TODO: externalize string later
+ Job migrater = new Job("Migrate Charset") {
+ protected IStatus run(IProgressMonitor monitor) {
+ if (file != null) {
+ try {
+ file.setCharset(charset, null);
+ } catch (CoreException e) {
+ Logger.logException(e);
+ }
+ }
+ return Status.OK_STATUS;
+ }
+ };
+ migrater.setSystem(true);
+ migrater.schedule();
+
+ }
+
+ /**
+ *
+ */
+ private void resetAll() {
+ fFilename = null;
+ fReader = null;
+ fPreviousEncodingMemento = null;
+ fCurrentEncodingMemento = null;
+ fHasBeenAnalyzed = false;
+ fClientSuppliedReader = false;
+ }
+
+ public void set(IFile file, Reader reader) {
+ fIFile = file;
+ set(file.getName(), reader);
+ }
+
+ public void set(String filename, char[] characterArray) {
+ resetAll();
+ fFilename = filename;
+ fReader = new CharArrayReader(characterArray);
+ }
+
+ public void set(String filename, Reader reader) {
+ resetAll();
+ fFilename = filename;
+ fReader = reader;
+ fClientSuppliedReader = true;
+ }
+
+ public void set(String filename, String textString) {
+ set(filename, new StringReader(textString));
+ }
+
+ public void setPreviousEncodingMemento(EncodingMemento previousEncodingMemento) {
+ fPreviousEncodingMemento = previousEncodingMemento;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CommonCharsetNames.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CommonCharsetNames.java
new file mode 100644
index 0000000..df09696
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CommonCharsetNames.java
@@ -0,0 +1,230 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.nio.charset.Charset;
+import java.nio.charset.IllegalCharsetNameException;
+import java.nio.charset.UnsupportedCharsetException;
+import java.util.ArrayList;
+import java.util.Hashtable;
+import java.util.Properties;
+
+import org.eclipse.core.runtime.IPath;
+import org.eclipse.core.runtime.Path;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.wst.sse.core.internal.encoding.util.CharsetResourceHandler;
+import org.osgi.framework.Bundle;
+
+
+/**
+ * CommonCharsets is a utility class to provide a central place to map some
+ * IANA charset name to a Java charset name. In 1.4 JRE's this mostly is a
+ * thin wrapper of existing Charset functionality. It does, however, allow
+ * some "overriding" of the built in JRE mappings in the event they are
+ * necessary. See CodedIO.checkMappingOverrides.
+ * <p>
+ * This class also provides some convenient human readable description for the
+ * charset names which can be used in user interfaces. The description is NL
+ * aware based on locale. The data is populated via the charset.properties
+ * file only once, based on static initialization of the hashtables.
+ * <p>
+ * The IANA tags are based on reference information found at the
+ * http://www.iana.org site. Specifically see
+ * http://www.iana.org/assignments/character-sets
+ */
+public final class CommonCharsetNames {
+
+ private static Properties defaultIANAmappings = null;
+
+ private static ArrayList encodings = null;
+
+ private static Hashtable supportedEncodingDisplayNames = null;
+
+ /**
+ * Returns list of commonly available encoding names. Suitable for
+ * populating a UI dialog or drop down. This list would be a subset of all
+ * possible charsets the VM supports (which can get into the hundreds).
+ * For the VM supported charsets, use
+ * <code>Charset.availableCharsets()</code>
+ *
+ * @return String[]
+ */
+ public static String[] getCommonCharsetNames() {
+ String[] enc = new String[getEncodings().size()];
+ getEncodings().toArray(enc);
+ return enc;
+ }
+
+ /**
+ * @return Returns the defaultIANAmappings.
+ */
+ private static Properties getDefaultIANAMappings() {
+ if (defaultIANAmappings == null) {
+ defaultIANAmappings = new Properties();
+ Bundle keyBundle = Platform.getBundle(ICodedResourcePlugin.ID);
+ IPath keyPath = new Path("config/defaultIANA.properties"); //$NON-NLS-1$
+ URL location = Platform.find(keyBundle, keyPath);
+ InputStream propertiesInputStream = null;
+ try {
+ propertiesInputStream = location.openStream();
+ defaultIANAmappings.load(propertiesInputStream);
+ } catch (IOException e) {
+ // if can't read, just assume there's no
+ // default IANA mappings
+ // and repeated attempts will not occur,
+ // since they
+ // will be represented by an empty
+ // Properties object
+ }
+ }
+ return defaultIANAmappings;
+ }
+
+ /**
+ * Returns display (translated) string for encoding name. If there is no
+ * "custom" translated version available, it defers to ther VM's Charset
+ * support. It will return null if no display name is available.
+ *
+ * @param String
+ * charset name
+ * @return Human friendly display name
+ */
+ public static String getDisplayString(String charsetName) {
+ if (charsetName == null)
+ return null;
+ String result = (String) getSupportedEncodingDisplayNames().get(charsetName);
+ if (result == null) {
+ // if we don't have a special one, just return
+ // what's provided by Charset
+
+ try {
+ Charset charset = Charset.forName(charsetName);
+ result = charset.displayName();
+ } catch (UnsupportedCharsetException e) {
+ // if not supported, the display name is
+ // the least of clients concerns :)
+ }
+ }
+ return result;
+ }
+
+ /**
+ * @return Returns the javaEncodings.
+ */
+ private static ArrayList getEncodings() {
+ if (encodings == null) {
+ initHashTables();
+ }
+ return encodings;
+ }
+
+ public static String getIanaPreferredCharsetName(String charsetName) {
+ String preferredName = charsetName;
+
+ try {
+ Charset charset = Charset.forName(charsetName);
+ if (charset.name() != null) {
+ preferredName = charset.name();
+ }
+ } catch (IllegalCharsetNameException e) {
+ // just return input if illegal
+ } catch (UnsupportedCharsetException e) {
+ // just return input if illegal
+ }
+ return preferredName;
+ }
+
+ /**
+ * Returns a default IANA name that is listed in CommonCharsetNames. Here
+ * is how it checks: 1. check to see if charsetName is in the
+ * CommonCharsetNames list and if so, just return it. 2. check to see if
+ * charsetName is listed in defaultIANAmappings which contains a mapping
+ * of more common encodings and the default IANA name they should map to.
+ * 3. return defaultIanaName if all else fails
+ */
+ public static String getPreferredDefaultIanaName(String charsetName, String defaultIanaName) {
+ String preferredName = defaultIanaName;
+ String guessedName = charsetName;
+ try {
+ guessedName = CodedIO.getAppropriateJavaCharset(charsetName);
+ } catch (IllegalCharsetNameException e) {
+ // just ignore if illegal
+ } catch (UnsupportedCharsetException e) {
+ // just ignore if illegal
+ }
+ if (getEncodings().contains(guessedName))
+ preferredName = guessedName;
+ else {
+ preferredName = getDefaultIANAMappings().getProperty(guessedName, preferredName);
+ }
+
+ return preferredName;
+ }
+
+ /**
+ * @return
+ */
+ private static Hashtable getSupportedEncodingDisplayNames() {
+ if (supportedEncodingDisplayNames == null) {
+ initHashTables();
+ }
+ return supportedEncodingDisplayNames;
+ }
+
+ private static void initHashTables() {
+ if (supportedEncodingDisplayNames == null) {
+ // Initialize hash table for encoding table
+ supportedEncodingDisplayNames = new Hashtable();
+ encodings = new ArrayList();
+
+ String totalNumString = CharsetResourceHandler.getString("totalnumber");//$NON-NLS-1$
+ int totalNum = 0;
+ if (totalNumString.length() != 0) {
+ try {
+ totalNum = Integer.valueOf(totalNumString).intValue();
+ }
+ catch (NumberFormatException e) {
+ totalNum = 0;
+ }
+ }
+
+ for (int i = 0; i < totalNum; i++) {
+ String iana = CharsetResourceHandler.getString("codeset." + i + ".iana");//$NON-NLS-2$//$NON-NLS-1$
+ String displayName = CharsetResourceHandler.getString("codeset." + i + ".label");//$NON-NLS-2$//$NON-NLS-1$
+
+ encodings.add(iana);
+ supportedEncodingDisplayNames.put(iana, displayName);
+ }
+ }
+ }
+
+ public static void main(String[] args) {
+ // unit test only
+ String test = "Cp1252"; //$NON-NLS-1$
+ String result = CommonCharsetNames.getIanaPreferredCharsetName(test);
+ System.out.println(test + " --> " + result); //$NON-NLS-1$
+
+ test = "MS932"; //$NON-NLS-1$
+ result = CommonCharsetNames.getIanaPreferredCharsetName(test);
+ System.out.println(test + " --> " + result); //$NON-NLS-1$
+
+ }
+
+ public CommonCharsetNames() {
+ super();
+ initHashTables();
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CommonEncodingPreferenceNames.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CommonEncodingPreferenceNames.java
new file mode 100644
index 0000000..0726665
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/CommonEncodingPreferenceNames.java
@@ -0,0 +1,68 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+/**
+ * Common preference keys used to specify encoding and end of line.
+ */
+public class CommonEncodingPreferenceNames {
+ /**
+ * Constant to be used when referring to CR/MAC line delimiter
+ */
+ public static final String CR = "EOL_Mac"; //$NON-NLS-1$
+ /**
+ * Constant to be used when referring to CRLF/WINDOWS line delimiter
+ */
+ public static final String CRLF = "EOL_Windows"; //$NON-NLS-1$
+ /**
+ * The end-of-line character(s) to use.
+ */
+ public static final String END_OF_LINE_CODE = "endOfLineCode";//$NON-NLS-1$
+ /**
+ * The character code to use when reading a file.
+ */
+ public static final String INPUT_CODESET = "inputCodeset";//$NON-NLS-1$
+
+ /**
+ * Constant to be used when referring to LF/UNIX line delimiter
+ */
+ public static final String LF = "EOL_Unix"; //$NON-NLS-1$
+ /**
+ * Constant to be used when referring to No translation of line delimiters
+ */
+ public static final String NO_TRANSLATION = ""; //$NON-NLS-1$
+ /**
+ * The character code to use when writing a file.
+ */
+ public static final String OUTPUT_CODESET = "outputCodeset";//$NON-NLS-1$
+
+ /**
+ * String representation of CR/MAC line delimiter
+ */
+ public static final String STRING_CR = "\r";//$NON-NLS-1$
+
+ /**
+ * String representation of CRLF/WINDOWS line delimiter
+ */
+ public static final String STRING_CRLF = "\r\n";//$NON-NLS-1$
+
+ /**
+ * String representation of LF/UNIX line delimiter
+ */
+ public static final String STRING_LF = "\n";//$NON-NLS-1$
+
+ /**
+ * String Use 3 byte BOM (Byte Order Mark) when saving UTF-8 encoded files
+ */
+ public static final String USE_3BYTE_BOM_WITH_UTF8 = "Use3ByteBOMWithUTF8"; //$NON-NLS-1$
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ContentBasedPreferenceGateway.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ContentBasedPreferenceGateway.java
new file mode 100644
index 0000000..332a09d
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ContentBasedPreferenceGateway.java
@@ -0,0 +1,165 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import org.eclipse.core.runtime.IPath;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.core.runtime.Plugin;
+import org.eclipse.core.runtime.content.IContentType;
+import org.eclipse.core.runtime.preferences.DefaultScope;
+import org.eclipse.core.runtime.preferences.IEclipsePreferences;
+import org.osgi.framework.Bundle;
+import org.osgi.service.prefs.Preferences;
+
+
+public class ContentBasedPreferenceGateway {
+ private static String DEFAULT_LOCATION = "org.eclipse.wst.sse.core"; //$NON-NLS-1$
+ private static String RUNTIME_XML_ID = "org.eclipse.runtime.xml"; //$NON-NLS-1$
+ private static String SSE_XML_ID = "org.eclipse.wst.xml.core.xmlsource"; //$NON-NLS-1$
+
+ /**
+ * @param pluginId
+ * @return
+ */
+ private static boolean bundleExists(String pluginId) {
+
+ // this just verifies there's really a plugin with this ID in "stack"
+ Bundle bundle = Platform.getBundle(pluginId);
+ return (!(bundle == null));
+ }
+
+ /**
+ * @param contentType
+ * @return
+ */
+ private static String getContributorPluginId(IContentType contentType) {
+ // TODO: need to have registration info here, but for now, we'll use
+ // simple heuristic to cover the cases we know about.
+ String fullId = null;
+ if (contentType == null) {
+ fullId = DEFAULT_LOCATION;
+ } else {
+ fullId = contentType.getId();
+ }
+ String pluginId = inferPluginId(fullId);
+ // only one known case, so far, of hard coded re-direction
+ // (not sure this is even needed, but just in case).
+ // We don't want to store/change runtime.xml preferences
+ if (RUNTIME_XML_ID.equals(pluginId)) {
+ pluginId = SSE_XML_ID;
+ }
+ return pluginId;
+ }
+
+ private static Preferences getDefaultPreferences(IContentType contentType) {
+ IEclipsePreferences eclipsePreferences = Platform.getPreferencesService().getRootNode();
+ // TODO: eventaully need extension mechanism to avoid these hard coded
+ // mechanism.
+ // The idea is to load/store based on plugin's preferences, where the
+ // content type was contributed
+ // Eventually, too, we could do more "dynamic lookup" to get parent
+ // types for defaults, etc.
+
+ // Get default plugin preferences
+ String pluginPreferenceLocation = DefaultScope.SCOPE + IPath.SEPARATOR + getContributorPluginId(contentType);
+ Preferences pluginPreferences = eclipsePreferences.node(pluginPreferenceLocation);
+
+ // the below code does not work at this time because content type
+ // preferences are stored in the place as plugin preferences
+
+ // Preferences contentPreferences = null;
+ // if (contentType != null) {
+ // contentPreferences = pluginPreferences.node(contentType.getId());
+ // }
+ // else {
+ // contentPreferences = pluginPreferences.node(DEFAULT_LOCATION );
+ // }
+ //
+ // return contentPreferences;
+
+ return pluginPreferences;
+
+ }
+
+ private static Preferences getDefaultPreferences(String contentTypeId) {
+ IContentType contentType = Platform.getContentTypeManager().getContentType(contentTypeId);
+ return getDefaultPreferences(contentType);
+ }
+
+ public static Preferences getPreferences(IContentType contentType) {
+ IEclipsePreferences eclipsePreferences = Platform.getPreferencesService().getRootNode();
+ // TODO: eventaully need extension mechanism to avoid these hard coded
+ // mechanism.
+ // The idea is to load/store based on plugin's preferences, where the
+ // content type was contributed
+ // Eventually, too, we could do more "dynamic lookup" to get parent
+ // types for defaults, etc.
+
+ // Get instance plugin preferences
+ String pluginPreferenceLocation = Plugin.PLUGIN_PREFERENCE_SCOPE + IPath.SEPARATOR + getContributorPluginId(contentType);
+ Preferences pluginPreferences = eclipsePreferences.node(pluginPreferenceLocation);
+
+ // the below code does not work at this time because content type
+ // preferences are stored in the place as plugin preferences
+
+ // Preferences contentPreferences = null;
+ // if (contentType != null) {
+ // contentPreferences = pluginPreferences.node(contentType.getId());
+ // }
+ // else {
+ // contentPreferences = pluginPreferences.node(DEFAULT_LOCATION );
+ // }
+ //
+ // return contentPreferences;
+ return pluginPreferences;
+
+ }
+
+ public static Preferences getPreferences(String contentTypeId) {
+ IContentType contentType = Platform.getContentTypeManager().getContentType(contentTypeId);
+ return getPreferences(contentType);
+ }
+
+ public static String getPreferencesString(IContentType contentType, String key) {
+ Preferences preferences = getPreferences(contentType);
+ String value = preferences.get(key, getDefaultPreferences(contentType).get(key, null));
+ return value;
+ }
+
+ public static String getPreferencesString(String contentTypeId, String key) {
+ Preferences preferences = getPreferences(contentTypeId);
+ String value = preferences.get(key, getDefaultPreferences(contentTypeId).get(key, null));
+ return value;
+ }
+
+ /**
+ * @param fullId
+ * @return
+ */
+ private static String inferPluginId(String fullId) {
+ // simply trim off last "segment" from full ID.
+ int lastSegmentPos = fullId.lastIndexOf('.');
+ String pluginId = null;
+ if (lastSegmentPos != -1) {
+ pluginId = fullId.substring(0, lastSegmentPos);
+ } else {
+ // weird case? We'll at least put/get them somewhere
+ pluginId = DEFAULT_LOCATION;
+ }
+ if (!bundleExists(pluginId)) {
+ // use default location
+ pluginId = DEFAULT_LOCATION;
+ }
+ return pluginId;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ContentTypeEncodingPreferences.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ContentTypeEncodingPreferences.java
new file mode 100644
index 0000000..7ed07ba
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ContentTypeEncodingPreferences.java
@@ -0,0 +1,163 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import org.eclipse.core.resources.ResourcesPlugin;
+import org.eclipse.wst.sse.core.internal.encoding.util.Assert;
+
+
+/**
+ * A convenience class to statically get preferenences.
+ */
+
+public abstract class ContentTypeEncodingPreferences {
+
+ // actually a null/empty string also means use workbench default so this
+ // constant might not really be necessary
+ public static final String WORKBENCH_DEFAULT = "WORKBENCH_DEFAULT"; //$NON-NLS-1$
+
+
+ private static final String getJavaPlatformDefaultEncoding() {
+ // note: its important to use this system property,
+ // instead
+ // of
+ // ByteToCharConverter.getDefault().getCharacterEncoding()
+ // inorder to handle changes "on the fly". the
+ // ByteToCharConverter
+ // default is apparently set only when VM starts.
+ // There's not really any "cusomter scnererios"
+ // that change the
+ // default encoding "on the fly", but its at least
+ // used during
+ // our automated tests.
+ String enc = System.getProperty("file.encoding"); //$NON-NLS-1$
+ // return blank as null
+ if (enc != null && enc.trim().length() == 0) {
+ enc = null;
+ }
+ return enc;
+ }
+
+
+ public static final String getPreferredNewLineDelimiter(String contentTypeId) {
+ String result = null;
+ String newLineCode = null;
+ newLineCode = ContentBasedPreferenceGateway.getPreferencesString(contentTypeId, CommonEncodingPreferenceNames.END_OF_LINE_CODE);
+ if (newLineCode == null)
+ result = null;
+ else if (newLineCode.equals(CommonEncodingPreferenceNames.CR))
+ result = CommonEncodingPreferenceNames.STRING_CR;
+ else if (newLineCode.equals(CommonEncodingPreferenceNames.LF))
+ result = CommonEncodingPreferenceNames.STRING_LF;
+ else if (newLineCode.equals(CommonEncodingPreferenceNames.CRLF))
+ result = CommonEncodingPreferenceNames.STRING_CRLF;
+ return result;
+ }
+
+ /**
+ * Returns current output encoding preference for contentTypeIdentifier
+ * (unique IANA encoding)
+ */
+ public static final String getUserPreferredCharsetName(String contentTypeId) {
+ String prefEncoding = ContentBasedPreferenceGateway.getPreferencesString(contentTypeId, CommonEncodingPreferenceNames.OUTPUT_CODESET);
+ String encoding = prefEncoding;
+ // get workbench encoding preference if preference
+ // requests it
+ if (prefEncoding == null || prefEncoding.trim().length() == 0 || prefEncoding.equals(ContentTypeEncodingPreferences.WORKBENCH_DEFAULT)) {
+ encoding = ContentTypeEncodingPreferences.getWorkbenchPreferredCharsetName();
+ }
+ return encoding;
+ }
+
+ /**
+ * Utility method to get specified preference. Subclasses can't override,
+ * since we expect this to work in a consistent way. Note: this is
+ * specific to HTML and CSS and is intended to supply a "default spec"
+ * other than the workbench platform's default, only for those cases where
+ * there is no encoding specified anywhere else, e.g. in the file, or as a
+ * file or folder property.
+ */
+ public static final String getUserSpecifiedDefaultEncodingPreference() {
+ String ContentTypeID_HTML = "org.eclipse.wst.html.core.htmlsource"; //$NON-NLS-1$
+ return getUserSpecifiedDefaultEncodingPreference(ContentTypeID_HTML);
+ }
+
+ public static final String getUserSpecifiedDefaultEncodingPreference(String contentTypeID) {
+ String enc = ContentBasedPreferenceGateway.getPreferencesString(contentTypeID, CommonEncodingPreferenceNames.INPUT_CODESET);
+ if (enc == null || enc.trim().length() == 0) {
+ enc = getWorkbenchSpecifiedDefaultEncoding();
+ }
+ // return blank as null
+ if (enc != null && enc.trim().length() == 0) {
+ enc = null;
+ }
+ return enc;
+ }
+
+ /**
+ * Returns Workbench encoding preference. Note: if workbench encoding is
+ * null, platform encoding will be returned.
+ */
+ private static final String getWorkbenchPreferredCharsetName() {
+ String charset = ResourcesPlugin.getEncoding();
+ charset = CommonCharsetNames.getIanaPreferredCharsetName(charset);
+ return charset;
+ }
+
+ /**
+ * Returns Workbench encoding preference. Will return null if none
+ * specified.
+ */
+ private static final String getWorkbenchSpecifiedDefaultEncoding() {
+ ResourcesPlugin resourcePlugin = ResourcesPlugin.getPlugin();
+ String enc = resourcePlugin.getPluginPreferences().getString(ResourcesPlugin.PREF_ENCODING);
+ // return blank as null
+ if (enc != null && enc.trim().length() == 0) {
+ enc = null;
+ }
+ return enc;
+ }
+
+ public static final String useDefaultNameRules(IResourceCharsetDetector encodingProvider) {
+ String result = null;
+ String enc = null;
+ enc = encodingProvider.getSpecDefaultEncoding();
+ if (enc != null) {
+ result = enc;
+ } else {
+ enc = getUserSpecifiedDefaultEncodingPreference();
+ if (enc != null && enc.trim().length() > 0) {
+ result = enc;
+ } else {
+ if (enc == null || enc.trim().length() == 0) {
+ enc = getWorkbenchSpecifiedDefaultEncoding();
+ if (enc != null) {
+ result = enc;
+ }
+ }
+ if (enc == null || enc.trim().length() == 0) {
+ enc = getJavaPlatformDefaultEncoding();
+ // enc should never be null (but we'll
+ // check anyway)
+ if (enc != null) {
+ result = enc;
+ }
+ }
+ }
+ }
+ Assert.isNotNull(enc, "post condition invalid"); //$NON-NLS-1$
+ result = CodedIO.checkMappingOverrides(enc);
+ return result;
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingHelper.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingHelper.java
new file mode 100644
index 0000000..6aeba07
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingHelper.java
@@ -0,0 +1,264 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.UnsupportedEncodingException;
+import java.util.StringTokenizer;
+
+
+/**
+ * EncodingHelper is used to determine the IANA tag / java encoding from the
+ * processing instruction. From this processing instruction: <?xml
+ * version="1.0" encoding="UTF-8"?>getEncodingTag() would return "UTF-8" and
+ * getEncoding() would return UTF8. The processing instruction is searched for
+ * via an input stream, or a byte array, depending on the constructor used.
+ *
+ * @deprecated - users of this class should move to base support: see
+ * iFile.getCharset (and related functions with bom detection in
+ * IContentDescription
+ *
+ */
+public class EncodingHelper {
+
+ protected static final int encodingNameSearchLimit = 1000;
+ protected static final org.eclipse.wst.sse.core.internal.encoding.util.SupportedJavaEncoding javaEncodingConverterHelper = new org.eclipse.wst.sse.core.internal.encoding.util.SupportedJavaEncoding();
+
+ /**
+ * @deprecated Encoding preferences are stored on a per content type
+ * basis. See the EncodingHelper.java class in b2butil for an
+ * example of how to extract the default encoding for a given
+ * content type.
+ */
+ static public String getDefaultEncoding() {
+ return javaEncodingConverterHelper.getJavaConverterName(getDefaultEncodingTag());
+ }
+
+ /**
+ * @deprecated Encoding preferences are stored on a per content type
+ * basis. See the EncodingHelper.java class in b2butil for an
+ * example of how to extract the default encoding for a given
+ * content type.
+ */
+ static public String getDefaultEncodingTag() {
+ return "UTF-8"; //$NON-NLS-1$
+ }
+
+ private String encodingName = null;
+ private String encodingTag;
+
+ /**
+ * Method EncodingHelper. Determine the encoding based on the byte array
+ * passed in.
+ *
+ * @param headerBytes
+ * @param length
+ */
+ public EncodingHelper(byte[] headerBytes, int length) {
+ determineEncoding(headerBytes, length);
+ }
+
+ /**
+ * Method EncodingHelper. Determine the encoding based on the input
+ * stream. Only the first 1000 bytes will be searched
+ *
+ * @param inStream
+ */
+ public EncodingHelper(InputStream inStream) {
+ determineEncoding(inStream);
+ }
+
+ private void determineEncoding(byte[] headerBytes, int length) {
+ try {
+ if (headerBytes.length >= 4) {
+ if ((headerBytes[0] == -2) && (headerBytes[1] == -1)) // Byte
+ // Order
+ // Mark
+ // ==
+ // 0xFEFF
+ {
+ // UTF-16, big-endian
+ encodingName = "UnicodeBig"; //$NON-NLS-1$
+ // encodingTag = "UTF-16";//$NON-NLS-1$
+ } else if ((headerBytes[0] == -1) && (headerBytes[1] == -2)) // Byte
+ // Order
+ // Mark
+ // ==
+ // 0xFFFE
+ {
+ // UTF-16, little-endian
+ encodingName = "UnicodeLittle"; //$NON-NLS-1$
+ // encodingTag = "UTF-16";//$NON-NLS-1$
+ } else if ((headerBytes[0] == -17) && (headerBytes[1] == -69) && (headerBytes[2] == -65)) // Byte
+ // Order
+ // Mark
+ // ==
+ // 0xEF
+ // BB
+ // BF
+ {
+ // UTF-8
+ encodingName = "UTF8"; //$NON-NLS-1$
+ }
+
+ // // Otherwise, we need to check the document's content.
+ // // ( UTF-8, US-ASCII, ISO-8859, Shift-JIS, EUC, ... )
+ // if ((encodingName != null) && (encodingName.length() == 0))
+ // // special
+ // {
+ // encodingName = getEncodingNameByAuto(smallBuffer,
+ // smallBuffer.length);
+ // }
+ // else
+ // XMLEncodingPlugin.getPlugin().getMsgLogger().write("encoding
+ // name from BOM = " + encodingName);
+ // System.out.println("encoding name from BOM = " +
+ // encodingName);
+ // Now determine the encoding tag
+ encodingTag = getEncodingName(headerBytes, headerBytes.length);
+ if (encodingName == null) {
+ if (encodingTag != null) {
+ encodingName = javaEncodingConverterHelper.getJavaConverterName(encodingTag);
+ } else {
+ encodingName = "UTF8"; //$NON-NLS-1$
+ }
+ }
+
+ if (encodingTag == null || encodingTag.trim().equals("")) { //$NON-NLS-1$
+ encodingTag = javaEncodingConverterHelper.getIANAEncodingName(encodingName);
+ }
+ // System.out.println("encodingTag = " + encodingTag);
+ }
+ } catch (UnsupportedEncodingException ex) {
+ // if this classn't deprecated, this should not be ignored
+ }
+ }
+
+ protected void determineEncoding(InputStream inStream) {
+ try {
+ // try and get at least first four bytes for auto encoding
+ // detection
+ byte[] headerBytes = getBytes(inStream, encodingNameSearchLimit);
+ determineEncoding(headerBytes, encodingNameSearchLimit);
+ } catch (IOException exception) {
+ // if exception, no bytes}
+ }
+ }
+
+ private byte[] getBytes(InputStream inputStream, int max) throws IOException {
+ byte[] allHeaderBytes = new byte[max];
+ int nRead = inputStream.read(allHeaderBytes, 0, max);
+
+ byte[] headerBytes = null;
+ if (nRead != max) {
+ headerBytes = new byte[nRead];
+ System.arraycopy(allHeaderBytes, 0, headerBytes, 0, nRead);
+ } else {
+ headerBytes = allHeaderBytes;
+ }
+ return headerBytes;
+ }
+
+ /**
+ * <code>getEncoding</code> Retrieve the java converter name from the
+ * document.
+ *
+ * @return a <code>String</code> value for the java converter
+ */
+ public String getEncoding() {
+ return encodingName;
+ }
+
+ /**
+ * Use the encoding information in the document.
+ */
+ protected String getEncodingName(byte[] string, int length) throws UnsupportedEncodingException {
+ String enc = null;
+ enc = getEncodingNameInDocument(string, length);
+ return (enc);
+ }
+
+ protected String getEncodingNameInDocument(byte[] string, int length) throws UnsupportedEncodingException {
+ String encoding = null;
+ final String content;
+ if (encodingName != null) {
+ content = new String(string, encodingName);
+ } else {
+ content = new String(string); // use default Java encoder
+ }
+
+ String prologTag = "<?xml"; //$NON-NLS-1$
+
+ String encodingKeyword = "encoding"; //$NON-NLS-1$
+
+ int indexStart = content.indexOf(prologTag);
+ if (indexStart != -1) {
+ int indexEnd = content.indexOf(">", indexStart); //$NON-NLS-1$
+ if (indexEnd != -1) {
+ String prolog = content.substring(indexStart, indexEnd);
+ int encodingStart = prolog.indexOf(encodingKeyword);
+ if (encodingStart != -1) {
+ String encodingString = prolog.substring(encodingStart + encodingKeyword.length());
+ String delimiter = encodingString.indexOf("'") != -1 ? "'" : "\""; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
+ StringTokenizer tokenizer = new StringTokenizer(encodingString, delimiter);
+ String equalSign = "="; //$NON-NLS-1$
+ while (tokenizer.hasMoreElements()) {
+ String currToken = tokenizer.nextToken();
+ if (currToken.trim().equals(equalSign)) {
+ if (tokenizer.hasMoreElements()) {
+ encoding = tokenizer.nextToken();
+ }
+ break;
+ }
+ }
+ }
+
+ }
+ }
+
+ if (encoding != null) {
+ final int len = encoding.length();
+ if (len > 0) {
+ if ((encoding.charAt(0) == '"') && (encoding.charAt(len - 1) == '"')) {
+ encoding = encoding.substring(1, len - 1);
+ } else if ((encoding.charAt(0) == '\'') && (encoding.charAt(len - 1) == '\'')) {
+ encoding = encoding.substring(1, len - 1);
+ }
+ }
+ }
+ return encoding;
+ }
+
+ /**
+ * <code>getEncodingTag</code> Retrieve the encoding tag from the
+ * document.
+ *
+ * @return a <code>String</code> value for the encoding tag
+ */
+ public String getEncodingTag() {
+ return encodingTag;
+ }
+
+ public boolean isSameEncoding(String oldEncoding) {
+ if (oldEncoding == null) {
+ return false;
+ }
+
+ if (oldEncoding.equals(getEncoding())) {
+ return true;
+ }
+
+ return false;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingMemento.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingMemento.java
new file mode 100644
index 0000000..7c2fef0
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingMemento.java
@@ -0,0 +1,273 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import org.eclipse.core.runtime.content.IContentDescription;
+
+
+/**
+ * This class is to simply hold information and data about the type of
+ * encoding found for a resource. It not only includes names, etc., but also
+ * gives hints about the algorithm, or rule, that the encodng was determined.
+ * Having all this info in a central object, associated with the Document
+ * (technically, IStructuredDocument), allows for better user error messages,
+ * and better handling of knowing how to dump a file, given we know how it was
+ * loaded.
+ *
+ * Note: the data in this class is only valid if its has actually gone through
+ * the loading or dumping sequence. It is not accurate, for example, if a
+ * structuredDocument is simply created and then setText called. In this type
+ * of case, accuracy for loading and dumping is not required, since its all
+ * re-discovered. One limitation is that structuredDocument's created "from
+ * scratch" this way, don't have any encoding information to count on, and
+ * would have to arrange the processing to be done. (And it is done,
+ * automatically if going through loader or dumper, but perhaps not in future
+ * new uses. TODO: this can be inproved in future versions.)
+ *
+ * isInitialized is set when the loader or dumper processes have been used,
+ * but even this can't be counted on 100% if the document has been modified
+ * since.
+ *
+ */
+public class EncodingMemento implements Cloneable {
+
+ public final static String CLONED = "cloned"; //$NON-NLS-1$
+ public final static String DEFAULTS_ASSUMED_FOR_EMPTY_INPUT = "DefaultsAssumedForEmptyInput"; //$NON-NLS-1$
+ public final static String DEFAULTS_USED_DUE_TO_SMALL_STREAM = "defaultsUsedDueToSmallStream"; //$NON-NLS-1$
+
+
+ /*
+ * Strings to be used for tracing. TODO: need to clean this up, we no
+ * longer use all of them
+ */
+ public final static String DETECTED_STANDARD_UNICODE_BYTES = "detectedStandardUnicodeBytes"; //$NON-NLS-1$
+ public final static String FOUND_ENCODING_IN_CONTENT = "foundEncodingInContent"; //$NON-NLS-1$
+ public final static String FOUND_ENCODING_IN_STREAM = "foundEncodingInStream"; //$NON-NLS-1$
+ public final static String FOUND_ENCODING_IN_STRUCTURED_DOCUMENT = "foundEncodingInStructuredDocument"; //$NON-NLS-1$
+ public final static String GUESSED_ENCODING_FROM_STREAM = "GuessEncodingFromStream"; //$NON-NLS-1$
+ public final static String JAVA_NAME_FOUND_AS_IANA_NAME = "noMappingFoundButJavaNameFoundToBeIANAName"; //$NON-NLS-1$
+ public final static String JAVA_NAME_FOUND_IN_ALIAS_NAME = "noMappingFoundButJavaNameFoundInAliasTable"; //$NON-NLS-1$
+ public final static String NO_IANA_NAME_FOUND = "noMappingFoundFromJavaNameToIANAName"; //$NON-NLS-1$
+ public final static String USED_CONTENT_TYPE_DEFAULT = "UsedContentTypeDefault"; //$NON-NLS-1$
+ public final static String USED_JAVA_DEFAULT = "UsedJavaDefault"; //$NON-NLS-1$
+ public final static String USED_MEMENTO_FROM_LOAD = "usedMementoFromLoad"; //$NON-NLS-1$
+ public final static String USED_PROPERTY_SETTINGS = "USED_PROPERTY_SETTINGS"; //$NON-NLS-1$
+ public final static String USED_USER_SPECIFIED_PREFERENCE = "UsedUserSpecifiedPreference"; //$NON-NLS-1$
+ public final static String USED_WORKSPACE_DEFAULT = "UsedWorkspaceDefault"; //$NON-NLS-1$
+ public final static String USER_IS_USING_JAVA_ENCODING = "UserIsUsingJavaEncoding"; //$NON-NLS-1$
+ private String fAppropriateDefault;
+ private String fDetectedCharsetName;
+ private EncodingRule fEncodingRule;
+ private String fInvalidEncoding;
+ private boolean fIsInitialized;
+
+
+ private String fJavaCharsetName;
+ private boolean fUnicodeStream;
+ private boolean fUTF83ByteBOMUsed;
+
+ /**
+ * TODO: will change to bit-wise, once trace info is removed.
+ */
+ public Object clone() {
+ EncodingMemento object = null;
+ try {
+ object = (EncodingMemento) super.clone();
+ } catch (CloneNotSupportedException e) {
+ // impossible, since we're implementing here
+ }
+
+ return object;
+
+ }
+
+ /**
+ * Returns the appropriateDefault. This is only set if an invalid encoding
+ * was found, and contains an charset appropriate to use as a default
+ * value, if, for example, the user decides to load the document anyway,
+ * even though the charset was found to be invalid.
+ *
+ * @return String
+ */
+ public String getAppropriateDefault() {
+ if (fAppropriateDefault == null) {
+ fAppropriateDefault = NonContentBasedEncodingRules.useDefaultNameRules(null);
+ }
+ return fAppropriateDefault;
+ }
+
+ /**
+ * Returns the charset name, if it is different from the charset name
+ * found in getJavaCharsetName. This can happen, for example, if there are
+ * differences in case. This method might return SHIFT_JIS, and the the
+ * getJavaCharsetName might return Shift_JIS -- if SHIFT_JIS was detected
+ * in file/document. If the original file contained the correct case, then
+ * this method would return null. The getJavaCharsetName is typically the
+ * one that should always be used, and this one only used for certain
+ * error conditions, or or if when creating a "duplicate" resource, it was
+ * desired to use exactly the charset name as in the original document. As
+ * an example of this later case, the original document might contain
+ * ISO-8859-9, but the detected charset name might contain ISO-8859-9-I.
+ *
+ * @return String
+ */
+ public String getDetectedCharsetName() {
+ return fDetectedCharsetName;
+ }
+
+ /**
+ * Returns the encodingRule in effect during last load or save
+ *
+ * @return EncodingRule
+ */
+ public EncodingRule getEncodingRule() {
+ return fEncodingRule;
+ }
+
+ /**
+ * Returns a charset name that was detected, but not found to be a charset
+ * suppoorted by the VM.
+ *
+ * @return String
+ */
+ public String getInvalidEncoding() {
+ return fInvalidEncoding;
+ }
+
+ /**
+ * Returns the java cononical charset name.
+ *
+ * @return String
+ */
+ public String getJavaCharsetName() {
+ return fJavaCharsetName;
+ }
+
+ /**
+ * Note: we may be able to remove this method, if it turns out this work
+ * is done by "text" type.
+ *
+ * @deprecated -
+ */
+ public byte[] getUnicodeBOM() {
+ byte[] bom = null;
+ if (isUTF83ByteBOMUsed())
+ bom = IContentDescription.BOM_UTF_8;
+ else if (isUnicodeStream()) {
+ if (getJavaCharsetName().equals("UTF-16") || getJavaCharsetName().equals("UTF-16LE")) { //$NON-NLS-1$ //$NON-NLS-2$
+ bom = IContentDescription.BOM_UTF_16LE;
+ } else if (getJavaCharsetName().equals("UTF-16BE")) { //$NON-NLS-1$
+ bom = IContentDescription.BOM_UTF_16BE;
+ }
+
+ }
+ return bom;
+ }
+
+ /**
+ * @deprecated - not sure if needed, or should at least be restricted
+ * access.
+ */
+ public boolean isInitialized() {
+ return fIsInitialized;
+ }
+
+ /**
+ * Note: in our implementation, the stream is a unicode stream if the
+ * charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is
+ * not considered unicode stream here.
+ *
+ * @return returns true if is a unicode (UTF-16) stream
+ */
+ public boolean isUnicodeStream() {
+ return fUnicodeStream;
+ }
+
+ /**
+ * Note: in our implementation, the stream is a unicode stream if the
+ * charset is UTF-16, UTF-16LE, or UTF-16BE. A stream with 3 byte BOM is
+ * not considered unicode stream here.
+ *
+ * Set during load, can be used by dumper to write 3 byte BOM, which Java
+ * does not normally do. This helps maintain compatibility with other
+ * programs (those that wrote the 3 byte BOM there to begin with.
+ *
+ * @return boolean
+ */
+ public boolean isUTF83ByteBOMUsed() {
+ return fUTF83ByteBOMUsed;
+ }
+
+ public boolean isValid() {
+ return getInvalidEncoding() == null;
+ }
+
+ /**
+ * Sets the appropriateDefault.
+ *
+ * @param appropriateDefault
+ * The appropriateDefault to set
+ */
+ public void setAppropriateDefault(String appropriateDefault) {
+ fAppropriateDefault = appropriateDefault;
+ }
+
+
+ public void setDetectedCharsetName(String detectedCharsetName) {
+ fDetectedCharsetName = detectedCharsetName;
+ }
+
+ public void setEncodingRule(EncodingRule encodingRule) {
+ fEncodingRule = encodingRule;
+ }
+
+ public void setInvalidEncoding(String invalidEncoding) {
+ fInvalidEncoding = invalidEncoding;
+ }
+
+ /**
+ * @deprecated
+ */
+ public void setIsInitialized(boolean isInitialized) {
+ fIsInitialized = isInitialized;
+ }
+
+ /**
+ * Sets the javaEncodingName.
+ *
+ * @param javaEncodingName
+ * The javaEncodingName to set
+ */
+ public void setJavaCharsetName(String javaCharsetName) {
+ fJavaCharsetName = javaCharsetName;
+ }
+
+ /**
+ * @param b
+ */
+ public void setUnicodeStream(boolean unicodeStream) {
+ fUnicodeStream = unicodeStream;
+
+ }
+
+ /**
+ * Sets the uTF83ByteBOMfound.
+ *
+ * @param uTF83ByteBOMfound
+ * The uTF83ByteBOMfound to set
+ */
+ public void setUTF83ByteBOMUsed(boolean uTF83ByteBOMUsed) {
+ fUTF83ByteBOMUsed = uTF83ByteBOMUsed;
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingRule.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingRule.java
new file mode 100644
index 0000000..53fbbe2
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/EncodingRule.java
@@ -0,0 +1,59 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+
+/**
+ * Class to provided "enumerated types" for encoding rule parameter. This is
+ * to be used by client to have some control over how encoding is determined.
+ */
+public class EncodingRule {
+ /**
+ * CONTENT_BASED means the class which uses the parameter (such as
+ * contentType Loaders) should use what ever rules it normally would.
+ * (Note, some content type loaders may not always literally use the file
+ * content to determine encoding, but the point is they should use what
+ * ever rules they normally would.)
+ */
+ public static final EncodingRule CONTENT_BASED = new EncodingRule("CONTENT_BASED"); //$NON-NLS-1$
+ /**
+ * FORCE_DEFAULT means the class which uses the parameter (such as
+ * contentType Loaders) should use what ever it defines as the default
+ * encoding.
+ */
+ public static final EncodingRule FORCE_DEFAULT = new EncodingRule("FORCE_DEFAULT"); //$NON-NLS-1$
+
+ /**
+ * IGNORE_CONVERSION_ERROR means that the save operation should save even
+ * if it encounters conversion errors. This will result in some data loss,
+ * so should only be used after the user confirms that is indeed what they
+ * want to do.
+ */
+ public static final EncodingRule IGNORE_CONVERSION_ERROR = new EncodingRule("IGNORE_CONVERSION_ERROR"); //$NON-NLS-1$
+
+
+ private final String encodingRule;
+
+ /**
+ * Constructor for EncodingRule is private, so no one can instantiate
+ * except this class itself.
+ */
+ private EncodingRule(String ruleName) {
+ super();
+ encodingRule = ruleName;
+ }
+
+ public String toString() {
+ return encodingRule;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ICodedResourcePlugin.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ICodedResourcePlugin.java
new file mode 100644
index 0000000..114f953
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/ICodedResourcePlugin.java
@@ -0,0 +1,19 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+
+public interface ICodedResourcePlugin {
+ String ID = "org.eclipse.wst.sse.core.internal.encoding"; //$NON-NLS-1$
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IContentDescriptionExtended.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IContentDescriptionExtended.java
new file mode 100644
index 0000000..a778ad3
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IContentDescriptionExtended.java
@@ -0,0 +1,42 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import org.eclipse.core.runtime.QualifiedName;
+
+
+public interface IContentDescriptionExtended {
+ /**
+ * The APPROPRIATE_DEFAULT field is used only when the
+ * IContentType.getDefaultCharset returns null. Its typically set from
+ * user preferences. Known uses cases are HTML and CSS, where there is no
+ * "spec default" for those content types.
+ */
+ public static final QualifiedName APPROPRIATE_DEFAULT = new QualifiedName(ICodedResourcePlugin.ID, "appropriateDefault"); //$NON-NLS-1$
+ /**
+ * The DETECTED_CHARSET property should be set when the "detected" charset
+ * is different from the java charset, even though functionally
+ * equivelent. This can occur, for example, when the cases are different,
+ * or when an alias name is used instead of the conanical name.
+ */
+ public final static QualifiedName DETECTED_CHARSET = new QualifiedName(ICodedResourcePlugin.ID, "detectedCharset"); //$NON-NLS-1$
+ /**
+ * The UNSUPPORTED_CHARSET property holds the charset value, if its been
+ * found to be an unsuppoted charset. This is helpful in error messages,
+ * or in cases when even though the charset is invalid, the java charset
+ * is assumed to be the default.
+ */
+ public final static QualifiedName UNSUPPORTED_CHARSET = new QualifiedName(ICodedResourcePlugin.ID, "unsupportedCharset"); //$NON-NLS-1$
+
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IResourceCharsetDetector.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IResourceCharsetDetector.java
new file mode 100644
index 0000000..6995b51
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IResourceCharsetDetector.java
@@ -0,0 +1,21 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import org.eclipse.core.resources.IStorage;
+import org.eclipse.core.runtime.CoreException;
+
+public interface IResourceCharsetDetector extends IStreamCharsetDetector {
+ void set(IStorage iStorage) throws CoreException;
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IStreamCharsetDetector.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IStreamCharsetDetector.java
new file mode 100644
index 0000000..0d8a5dd
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/IStreamCharsetDetector.java
@@ -0,0 +1,30 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+
+public interface IStreamCharsetDetector {
+ String getEncoding() throws IOException;
+
+ EncodingMemento getEncodingMemento() throws IOException;
+
+ String getSpecDefaultEncoding();
+
+ void set(InputStream inputStream);
+
+ void set(Reader reader);
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/NonContentBasedEncodingRules.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/NonContentBasedEncodingRules.java
new file mode 100644
index 0000000..ca4732d
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/NonContentBasedEncodingRules.java
@@ -0,0 +1,133 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding;
+
+import org.eclipse.core.resources.IFile;
+import org.eclipse.core.resources.ResourcesPlugin;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.core.runtime.content.IContentType;
+
+import sun.io.ByteToCharConverter;
+
+public class NonContentBasedEncodingRules {
+
+ private static final String getJavaPlatformDefaultEncoding() {
+ // note: its important to use this system property,
+ // instead
+ // of
+ // ByteToCharConverter.getDefault().getCharacterEncoding()
+ // inorder to handle changes "on the fly". the
+ // ByteToCharConverter
+ // default is apparently set only when VM starts.
+ // There's not really any "cusomter scnererios"
+ // that change the
+ // default encoding "on the fly", but it is used
+ // during
+ // some automated tests.
+ String enc = System.getProperty("file.encoding"); //$NON-NLS-1$
+ if (enc == null || enc.trim().length() == 0) {
+ enc = ByteToCharConverter.getDefault().getCharacterEncoding();
+ }
+ // return blank as null
+ if (enc != null && enc.trim().length() == 0) {
+ enc = null;
+ }
+ return enc;
+ }
+
+
+ public static String getUserSpecifiedDefaultForContentType(IFile iFile) {
+ IContentType contentType = null;
+ String enc = null;
+ try {
+ contentType = iFile.getContentDescription().getContentType();
+
+ enc = ContentBasedPreferenceGateway.getPreferencesString(contentType, CommonEncodingPreferenceNames.INPUT_CODESET);
+ // return blank as null
+ if (enc != null && enc.trim().length() == 0) {
+ enc = null;
+ }
+ } catch (CoreException e) {
+ // if core exception occurs, assume no preference!
+ enc = null;
+ }
+ return enc;
+ }
+
+ public static String getUserSpecifiedDefaultForContentType(String contentTypeId) {
+ String enc = ContentBasedPreferenceGateway.getPreferencesString(contentTypeId, CommonEncodingPreferenceNames.INPUT_CODESET);
+ // return blank as null
+ if (enc != null && enc.trim().length() == 0) {
+ enc = null;
+ }
+ return enc;
+ }
+
+ private static final String getWorkbenchSpecifiedDefaultEncoding() {
+ ResourcesPlugin resourcePlugin = ResourcesPlugin.getPlugin();
+ String enc = resourcePlugin.getPluginPreferences().getString(ResourcesPlugin.PREF_ENCODING);
+ // return blank as null
+ if (enc != null && enc.trim().length() == 0) {
+ enc = null;
+ }
+ return enc;
+ }
+
+ /**
+ * @param specDefault
+ * This is the default charset name that would ordinarily be
+ * used for a particular type of content. Null may be
+ * specififed for those types with no spec default. If the spec
+ * default is known (and passed in), then it will be returned
+ * after being checked to see if there's be any user specified
+ * "override" for that charset (which would be rare). In other
+ * words, if the spec is known, there's little reason to use
+ * this method.
+ * @return the charset that should be used according to the rules
+ * established by this class.
+ */
+ public static final String useDefaultNameRules(String specDefault) {
+ String enc = null;
+ String result = null;
+ enc = specDefault;
+ if (enc != null) {
+ result = enc;
+ } else {
+ enc = ContentTypeEncodingPreferences.getUserSpecifiedDefaultEncodingPreference();
+ if (enc != null && enc.trim().length() > 0) {
+ result = enc.trim();
+ } else {
+ if (enc == null || enc.trim().length() == 0) {
+ enc = getWorkbenchSpecifiedDefaultEncoding();
+ if (enc != null) {
+ result = enc.trim();
+ }
+ }
+ if (enc == null || enc.trim().length() == 0) {
+ enc = getJavaPlatformDefaultEncoding();
+ // enc should never be null (but we'll
+ // check anyway)
+ if (enc != null) {
+ result = enc;
+ }
+ }
+ }
+ }
+ result = CodedIO.checkMappingOverrides(result);
+ return result;
+ }
+
+ private NonContentBasedEncodingRules() {
+ super();
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/Assert.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/Assert.java
new file mode 100644
index 0000000..ac1ad3d
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/Assert.java
@@ -0,0 +1,164 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+/**
+ * <code>Assert</code> is useful for for embedding runtime sanity checks in
+ * code. The predicate methods all test a condition and throw some type of
+ * unchecked exception if the condition does not hold.
+ * <p>
+ * Assertion failure exceptions, like most runtime exceptions, are thrown when
+ * something is misbehaving. Assertion failures are invariably unspecified
+ * behavior; consequently, clients should never rely on these being thrown
+ * (and certainly should not being catching them specifically).
+ * </p>
+ */
+public final class Assert {
+
+ /**
+ * <code>AssertionFailedException</code> is a runtime exception thrown
+ * by some of the methods in <code>Assert</code>.
+ * <p>
+ * This class is not declared public to prevent some misuses; programs
+ * that catch or otherwise depend on assertion failures are susceptible to
+ * unexpected breakage when assertions in the code are added or removed.
+ * </p>
+ */
+ static class AssertionFailedException extends RuntimeException {
+ /**
+ * Comment for <code>serialVersionUID</code>
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Constructs a new exception.
+ */
+ public AssertionFailedException() {
+ super();
+ }
+
+ /**
+ * Constructs a new exception with the given message.
+ */
+ public AssertionFailedException(String detail) {
+ super(detail);
+ }
+ }
+
+ /**
+ * Asserts that an argument is legal. If the given boolean is not
+ * <code>true</code>, an <code>IllegalArgumentException</code> is
+ * thrown.
+ *
+ * @param expression
+ * the outcode of the check
+ * @return <code>true</code> if the check passes (does not return if the
+ * check fails)
+ * @exception IllegalArgumentException
+ * if the legality test failed
+ */
+ public static boolean isLegal(boolean expression) {
+ return isLegal(expression, ""); //$NON-NLS-1$
+ }
+
+ /**
+ * Asserts that an argument is legal. If the given boolean is not
+ * <code>true</code>, an <code>IllegalArgumentException</code> is
+ * thrown. The given message is included in that exception, to aid
+ * debugging.
+ *
+ * @param expression
+ * the outcode of the check
+ * @param message
+ * the message to include in the exception
+ * @return <code>true</code> if the check passes (does not return if the
+ * check fails)
+ * @exception IllegalArgumentException
+ * if the legality test failed
+ */
+ public static boolean isLegal(boolean expression, String message) {
+ if (!expression)
+ throw new IllegalArgumentException(message);
+ return expression;
+ }
+
+ /**
+ * Asserts that the given object is not <code>null</code>. If this is
+ * not the case, some kind of unchecked exception is thrown.
+ *
+ * @param object
+ * the value to test
+ * @exception IllegalArgumentException
+ * if the object is <code>null</code>
+ */
+ public static void isNotNull(Object object) {
+ isNotNull(object, ""); //$NON-NLS-1$
+ }
+
+ /**
+ * Asserts that the given object is not <code>null</code>. If this is
+ * not the case, some kind of unchecked exception is thrown. The given
+ * message is included in that exception, to aid debugging.
+ *
+ * @param object
+ * the value to test
+ * @param message
+ * the message to include in the exception
+ * @exception IllegalArgumentException
+ * if the object is <code>null</code>
+ */
+ public static void isNotNull(Object object, String message) {
+ if (object == null) {
+ Logger.log(Logger.ERROR, "null_argument: " + message); //$NON-NLS-1$
+ throw new AssertionFailedException();
+ }
+ }
+
+ /**
+ * Asserts that the given boolean is <code>true</code>. If this is not
+ * the case, some kind of unchecked exception is thrown.
+ *
+ * @param expression
+ * the outcode of the check
+ * @return <code>true</code> if the check passes (does not return if the
+ * check fails)
+ */
+ public static boolean isTrue(boolean expression) {
+ return isTrue(expression, ""); //$NON-NLS-1$
+ }
+
+ /**
+ * Asserts that the given boolean is <code>true</code>. If this is not
+ * the case, some kind of unchecked exception is thrown. The given message
+ * is included in that exception, to aid debugging.
+ *
+ * @param expression
+ * the outcode of the check
+ * @param message
+ * the message to include in the exception
+ * @return <code>true</code> if the check passes (does not return if the
+ * check fails)
+ */
+ public static boolean isTrue(boolean expression, String message) {
+ if (!expression) {
+ Logger.log(Logger.ERROR, "assertion failed: " + message); //$NON-NLS-1$
+ throw new AssertionFailedException();
+ }
+ return expression;
+ }
+
+ /* This class is not intended to be instantiated. */
+ private Assert() {
+ super();
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/BufferedLimitedStream.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/BufferedLimitedStream.java
new file mode 100644
index 0000000..7e20cb9
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/BufferedLimitedStream.java
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+public class BufferedLimitedStream extends BufferedInputStream {
+
+ private int limitedCount;
+
+ public BufferedLimitedStream(InputStream inStream, int size) {
+ super(inStream, size);
+ mark(size);
+ try {
+ limitedCount = Math.min(size, inStream.available());
+ } catch (IOException e) {
+ // unlikely
+ limitedCount = 0;
+ }
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.io.InputStream#available()
+ */
+ public synchronized int available() throws IOException {
+
+ return limitedCount - pos;
+ }
+
+ /**
+ * copied down from super class
+ */
+ private void ensureOpen() throws IOException {
+ if (in == null)
+ throw new IOException("Stream closed"); //$NON-NLS-1$
+ }
+
+ /**
+ * copied down from super class then, changed to simiulate EOF if goes
+ * beyond buffered amount
+ */
+ public synchronized int read() throws IOException {
+ ensureOpen();
+ // for this special stream, indicate "end of file" when buffer is
+ // full
+ if (pos >= limitedCount) {
+ return -1;
+ }
+ return super.read();
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.io.InputStream#read(byte[], int, int)
+ */
+ public synchronized int read(byte[] b, int off, int len) throws IOException {
+ // for this special stream, indicate "end of file" when buffer is
+ // full
+ if (pos >= limitedCount) {
+ return -1;
+ }
+ return super.read(b, off, len);
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/ByteReader.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/ByteReader.java
new file mode 100644
index 0000000..49215e0
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/ByteReader.java
@@ -0,0 +1,107 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+
+/**
+ * This is an "adapter" class, simply to get in input stream to act like a
+ * reader. We could not use InputStreamReader directly because its internal
+ * buffers are not controllable, and it sometimes pulls too much out of input
+ * stream (even when it wasn't needed for our purposes).
+ *
+ * The use of this class is highly specialized and by not means meant to be
+ * general purpose. Its use is restricted to those cases where the input
+ * stream can be regarded as ascii just long enough to determine what the real
+ * encoding should be.
+ */
+
+public class ByteReader extends Reader {
+
+ /** Default byte buffer size (2048). */
+ public static final int DEFAULT_BUFFER_SIZE = 2048;
+
+ protected byte[] fBuffer;
+
+ protected InputStream fInputStream;
+
+ protected ByteReader() {
+ super();
+ }
+
+ public ByteReader(InputStream inputStream) {
+ this(inputStream, DEFAULT_BUFFER_SIZE);
+ if (!inputStream.markSupported()) {
+ throw new IllegalArgumentException("ByteReader is required to have a resettable stream"); //$NON-NLS-1$
+ }
+ }
+
+ public ByteReader(InputStream inputStream, int size) {
+ this.fInputStream = inputStream;
+ if (!inputStream.markSupported()) {
+ throw new IllegalArgumentException("ByteReader is required to have a resettable stream"); //$NON-NLS-1$
+ }
+ this.fBuffer = new byte[size];
+
+ }
+
+ public void close() throws IOException {
+ this.fInputStream.close();
+ }
+
+ public void mark(int readAheadLimit) {
+ this.fInputStream.mark(readAheadLimit);
+ }
+
+ public boolean markSupported() {
+ return true;
+ }
+
+ public int read() throws IOException {
+ int b0 = this.fInputStream.read();
+ return (b0 & 0x00FF);
+ }
+
+ public int read(char ch[], int offset, int length) throws IOException {
+ if (length > this.fBuffer.length) {
+ length = this.fBuffer.length;
+ }
+
+ int count = this.fInputStream.read(this.fBuffer, 0, length);
+
+ for (int i = 0; i < count; i++) {
+ int b0 = this.fBuffer[i];
+ // the 0x00FF is to "lose" the negative bits filled in the byte to
+ // int conversion
+ // (and which would be there if cast directly from byte to char).
+ char c0 = (char) (b0 & 0x00FF);
+ ch[offset + i] = c0;
+ }
+ return count;
+ }
+
+ public boolean ready() throws IOException {
+ return this.fInputStream.available() > 0;
+ }
+
+ public void reset() throws IOException {
+ this.fInputStream.reset();
+ }
+
+ public long skip(long n) throws IOException {
+ return this.fInputStream.skip(n);
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CharsetResourceHandler.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CharsetResourceHandler.java
new file mode 100644
index 0000000..2dec0d1
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CharsetResourceHandler.java
@@ -0,0 +1,79 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.text.MessageFormat;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.wst.sse.core.internal.encoding.ICodedResourcePlugin;
+
+
+public class CharsetResourceHandler {
+ private static ResourceBundle fgResourceBundle;
+
+ /**
+ * Returns the resource bundle used by all classes in this Project
+ */
+ public static ResourceBundle getResourceBundle() {
+ try {
+ // TODO: rework this in terms of Platform.getPlugin(descriptor)
+ // and Location
+ URL configURI = Platform.getBundle(ICodedResourcePlugin.ID).getEntry("/"); //$NON-NLS-1$
+ String configPathString = configURI + "config/charset"; //$NON-NLS-1$
+ return ResourceBundleHelper.getResourceBundle(configPathString);
+ }
+ catch (MissingResourceException e) {
+ Logger.logException("invalid install or configuration", e); //$NON-NLS-1$
+ }
+ catch (MalformedURLException e) {
+ Logger.logException("invalid install or configuration", e); //$NON-NLS-1$
+ }
+ catch (IOException e) {
+ Logger.logException("invalid install or configuration", e); //$NON-NLS-1$
+ }
+ return null;
+ }
+
+ public static String getString(String key) {
+ String result = null;
+ if (fgResourceBundle == null) {
+ fgResourceBundle = getResourceBundle();
+ }
+ if (fgResourceBundle != null) {
+ try {
+ result = fgResourceBundle.getString(key);
+ }
+ catch (MissingResourceException e) {
+ result = "!" + key + "!";//$NON-NLS-2$//$NON-NLS-1$
+ }
+ }
+ else {
+ result = "!" + key + "!";//$NON-NLS-2$//$NON-NLS-1$
+ }
+ return result;
+ }
+
+ public static String getString(String key, Object[] args) {
+ try {
+ return MessageFormat.format(getString(key), args);
+ }
+ catch (IllegalArgumentException e) {
+ return getString(key);
+ }
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CodedResourcePlugin.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CodedResourcePlugin.java
new file mode 100644
index 0000000..e5c948f
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CodedResourcePlugin.java
@@ -0,0 +1,108 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.text.MessageFormat;
+import java.util.MissingResourceException;
+import java.util.ResourceBundle;
+
+import org.eclipse.core.resources.IWorkspace;
+import org.eclipse.core.resources.ResourcesPlugin;
+import org.eclipse.core.runtime.Plugin;
+import org.eclipse.core.runtime.Preferences;
+import org.eclipse.wst.sse.core.internal.encoding.CommonEncodingPreferenceNames;
+import org.eclipse.wst.sse.core.internal.encoding.ICodedResourcePlugin;
+
+/**
+ * The main plugin class to be used in the desktop.
+ */
+public class CodedResourcePlugin extends Plugin implements ICodedResourcePlugin {
+ //The shared instance.
+ private static CodedResourcePlugin plugin;
+ //Resource bundle.
+ private ResourceBundle resourceBundle;
+ private static final String KEY_PREFIX = "%"; //$NON-NLS-1$
+ private static final String KEY_DOUBLE_PREFIX = "%%"; //$NON-NLS-1$
+
+ /**
+ * Returns the shared instance.
+ */
+ public static CodedResourcePlugin getDefault() {
+ return plugin;
+ }
+
+ /**
+ * Returns the workspace instance.
+ */
+ public static IWorkspace getWorkspace() {
+ return ResourcesPlugin.getWorkspace();
+ }
+
+ /**
+ * The constructor.
+ */
+ public CodedResourcePlugin() {
+ super();
+ plugin = this;
+ }
+
+ protected void initializeDefaultPluginPreferences() {
+ Preferences prefs = getPluginPreferences();
+ prefs.setDefault(CommonEncodingPreferenceNames.USE_3BYTE_BOM_WITH_UTF8, false);
+ }
+
+ /**
+ * Returns the string from the plugin's resource bundle,
+ * or 'key' if not found.
+ */
+ public static String getResourceString(String value) {
+ String s = value.trim();
+ if (!s.startsWith(KEY_PREFIX, 0))
+ return s;
+ if (s.startsWith(KEY_DOUBLE_PREFIX, 0))
+ return s.substring(1);
+
+ int ix = s.indexOf(' ');
+ String key = ix == -1 ? s : s.substring(0, ix);
+
+ ResourceBundle bundle = getDefault().getResourceBundle();
+ try {
+ return (bundle != null) ? bundle.getString(key.substring(1)) : key;
+ } catch (MissingResourceException e) {
+ return key;
+ }
+ }
+
+ public static String getResourceString(String key, Object[] args) {
+
+ try {
+ return MessageFormat.format(getResourceString(key), args);
+ } catch (IllegalArgumentException e) {
+ return getResourceString(key);
+ }
+
+ }
+
+ /**
+ * Returns the plugin's resource bundle,
+ */
+ public ResourceBundle getResourceBundle() {
+ try {
+ if (resourceBundle == null)
+ resourceBundle = ResourceBundle.getBundle("org.eclipse.wst.sse.core.internal.encoding.internal.CommonEncodingPluginResources");
+ } catch (MissingResourceException x) {
+ resourceBundle = null;
+ }
+ return resourceBundle;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CommonEncodingPluginResources.properties b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CommonEncodingPluginResources.properties
new file mode 100644
index 0000000..1d96a0e
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/CommonEncodingPluginResources.properties
@@ -0,0 +1,14 @@
+###############################################################################
+# Copyright (c) 2001, 2004 IBM Corporation and others.
+# All rights reserved. This program and the accompanying materials
+# are made available under the terms of the Eclipse Public License v1.0
+# which accompanies this distribution, and is available at
+# http://www.eclipse.org/legal/epl-v10.html
+#
+# Contributors:
+# IBM Corporation - initial API and implementation
+# Jens Lukowski/Innoopract - initial renaming/restructuring
+#
+###############################################################################
+EncodingPreferencePage.0=Encoding settings for Web and XML files:
+EncodingPreferencePage.1=Use 3 byte BOM (Byte Order Mark) when saving UTF-8 encoded files
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/Logger.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/Logger.java
new file mode 100644
index 0000000..1226c04
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/Logger.java
@@ -0,0 +1,158 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.util.StringTokenizer;
+
+import org.eclipse.core.runtime.IStatus;
+import org.eclipse.core.runtime.Platform;
+import org.eclipse.core.runtime.Status;
+import org.osgi.framework.Bundle;
+
+
+/**
+ * Small convenience class to log messages to plugin's log file and also, if
+ * desired, the console. This class should only be used by classes in this
+ * plugin. Other plugins should make their own copy, with appropriate ID.
+ */
+public class Logger {
+ private static final String PLUGIN_ID = "org.eclipse.wst.sse.core.internal.encoding"; //$NON-NLS-1$
+
+ public static final int ERROR = IStatus.ERROR; // 4
+ public static final int ERROR_DEBUG = 200 + ERROR;
+ public static final int INFO = IStatus.INFO; // 1
+ public static final int INFO_DEBUG = 200 + INFO;
+
+ public static final int OK = IStatus.OK; // 0
+
+ public static final int OK_DEBUG = 200 + OK;
+
+ private static final String TRACEFILTER_LOCATION = "/debug/tracefilter"; //$NON-NLS-1$
+ public static final int WARNING = IStatus.WARNING; // 2
+ public static final int WARNING_DEBUG = 200 + WARNING;
+
+ /**
+ * Adds message to log.
+ *
+ * @param level
+ * severity level of the message (OK, INFO, WARNING, ERROR,
+ * OK_DEBUG, INFO_DEBUG, WARNING_DEBUG, ERROR_DEBUG)
+ * @param message
+ * text to add to the log
+ * @param exception
+ * exception thrown
+ */
+ protected static void _log(int level, String message, Throwable exception) {
+ if (level == OK_DEBUG || level == INFO_DEBUG || level == WARNING_DEBUG || level == ERROR_DEBUG) {
+ if (!isDebugging())
+ return;
+ }
+
+ int severity = IStatus.OK;
+ switch (level) {
+ case INFO_DEBUG :
+ case INFO :
+ severity = IStatus.INFO;
+ break;
+ case WARNING_DEBUG :
+ case WARNING :
+ severity = IStatus.WARNING;
+ break;
+ case ERROR_DEBUG :
+ case ERROR :
+ severity = IStatus.ERROR;
+ }
+ message = (message != null) ? message : "null"; //$NON-NLS-1$
+ Status statusObj = new Status(severity, PLUGIN_ID, severity, message, exception);
+ Bundle bundle = Platform.getBundle(PLUGIN_ID);
+ if (bundle != null)
+ Platform.getLog(bundle).log(statusObj);
+ }
+
+ /**
+ * Prints message to log if category matches /debug/tracefilter option.
+ *
+ * @param message
+ * text to print
+ * @param category
+ * category of the message, to be compared with
+ * /debug/tracefilter
+ */
+ protected static void _trace(String category, String message, Throwable exception) {
+ if (isTracing(category)) {
+ message = (message != null) ? message : "null"; //$NON-NLS-1$
+ Status statusObj = new Status(IStatus.OK, PLUGIN_ID, IStatus.OK, message, exception);
+ Bundle bundle = Platform.getBundle(PLUGIN_ID);
+ if (bundle != null)
+ Platform.getLog(bundle).log(statusObj);
+ }
+ }
+
+ /**
+ * @return true if the platform is debugging
+ */
+ public static boolean isDebugging() {
+ return Platform.inDebugMode();
+ }
+
+ /**
+ * Determines if currently tracing a category
+ *
+ * @param category
+ * @return true if tracing category, false otherwise
+ */
+ public static boolean isTracing(String category) {
+ if (!isDebugging())
+ return false;
+
+ String traceFilter = Platform.getDebugOption(PLUGIN_ID + TRACEFILTER_LOCATION);
+ if (traceFilter != null) {
+ StringTokenizer tokenizer = new StringTokenizer(traceFilter, ","); //$NON-NLS-1$
+ while (tokenizer.hasMoreTokens()) {
+ String cat = tokenizer.nextToken().trim();
+ if (category.equals(cat)) {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
+ public static void log(int level, String message) {
+ _log(level, message, null);
+ }
+
+ public static void log(int level, String message, Throwable exception) {
+ _log(level, message, exception);
+ }
+
+ public static void logException(String message, Throwable exception) {
+ _log(ERROR, message, exception);
+ }
+
+ public static void logException(Throwable exception) {
+ _log(ERROR, exception.getMessage(), exception);
+ }
+
+ public static void trace(String category, String message) {
+ _trace(category, message, null);
+ }
+
+ public static void traceException(String category, String message, Throwable exception) {
+ _trace(category, message, exception);
+ }
+
+ public static void traceException(String category, Throwable exception) {
+ _trace(category, exception.getMessage(), exception);
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/NullInputStream.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/NullInputStream.java
new file mode 100644
index 0000000..43f3aa2
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/NullInputStream.java
@@ -0,0 +1,69 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+
+public class NullInputStream extends InputStream {
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.io.InputStream#mark(int)
+ */
+ public synchronized void mark(int readlimit) {
+ // nothing to do
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.io.InputStream#markSupported()
+ */
+ public boolean markSupported() {
+ // we can mark nothing.
+ // and, we are using this Null class specifically for
+ // a "fake" resettable stream.
+ return true;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.io.InputStream#read()
+ */
+ public int read() throws IOException {
+
+ return -1;
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.io.InputStream#reset()
+ */
+ public synchronized void reset() throws IOException {
+ // nothing to do
+ }
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see java.io.InputStream#skip(long)
+ */
+ public long skip(long n) throws IOException {
+ return 0;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/ResourceBundleHelper.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/ResourceBundleHelper.java
new file mode 100644
index 0000000..e57c7ed
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/ResourceBundleHelper.java
@@ -0,0 +1,61 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.net.URLClassLoader;
+import java.util.Locale;
+import java.util.ResourceBundle;
+
+import org.eclipse.core.runtime.Platform;
+
+//TODO: rework this with new platform/runtime APIs (if still needed).
+
+public class ResourceBundleHelper {
+
+ public static ResourceBundle getResourceBundle(String resourceURI) throws MalformedURLException, IOException {
+ return getResourceBundle(resourceURI, Locale.getDefault());
+ }
+
+ public static ResourceBundle getResourceBundle(String resourceURI, Locale targetLocale) throws MalformedURLException, IOException {
+ // try to load bundle from the location specified in the resourceURI
+ // we make the assumption that the resourceURI points to the local
+ // file system
+
+ int index = resourceURI.lastIndexOf("/"); //$NON-NLS-1$
+ if (index == -1) {
+ throw new IllegalArgumentException("Invalid resourceURI"); //$NON-NLS-1$
+ }
+
+ // Below we set 'resourceDirectory' so that it ends with a '/'.
+ // Here's an excerpt from the ClassLoader Javadoc ...
+ // Any URL that ends with a '/' is assumed to refer to a directory.
+ // Otherwise, the URL is assumed
+ // to refer to a JAR file which will be opened as needed.
+ //
+ String resourceDirectory = resourceURI.substring(0, index + 1);
+ String resourceBundleName = resourceURI.substring(index + 1);
+
+ // create a class loader with a class path that points to the resource
+ // bundle's location
+ //
+ URL[] classpath = new URL[1];
+ classpath[0] = Platform.resolve(new URL(resourceDirectory));
+ ClassLoader resourceLoader = new URLClassLoader(classpath, null);
+
+ return ResourceBundle.getBundle(resourceBundleName, targetLocale, resourceLoader);
+ }
+}
+
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/SupportedJavaEncoding.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/SupportedJavaEncoding.java
new file mode 100644
index 0000000..36525a5
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/SupportedJavaEncoding.java
@@ -0,0 +1,288 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.util.ArrayList;
+import java.util.Hashtable;
+
+/**
+ * SupportedJavaEncoding is a utility class to provide IANA tag to java
+ * encoding identifier mappings. It also contains the human readable
+ * description for the IANA tag equivalent to be used in user interfaces. The
+ * description is NL aware based on locale. The data is populated via the
+ * charset.properties file only once, based on static initialization of the
+ * hashtables.
+ *
+ * The IANA tags are based on reference information found at the
+ * http://www.iana.org site. Specifically see
+ * http://www.iana.org/assignments/character-sets
+ *
+ * @deprecated - hard deprecated. will be removed within days. see
+ * CommonCharsetNames may need a similar class to allow
+ * "overriding" of default mappings, but JRE 1.4 mappings seem to
+ * suffice.
+ */
+public class SupportedJavaEncoding {
+
+
+ // Pair of supported alias IANA/real IANA names
+ private static final String aliases[][] = {
+ // the key (1st param) is the alias IANA name, names are case-insensitive
+ // the value (2nd param) is the real IANA name, names are
+ // case-insensitive
+ // Japanese
+ {"X-EUC-JP", "EUC-JP"}, // EUC encoding,
+ // Japanese//$NON-NLS-2$//$NON-NLS-1$
+ {"X-SJIS", "SHIFT_JIS"}, // Shift-JIS,
+ // Japanese//$NON-NLS-2$//$NON-NLS-1$
+ {"ANSI_X3.4-1968", "US-ASCII"}}; //$NON-NLS-1$ //$NON-NLS-2$
+ private static ArrayList ianaEncodings = null, javaEncodings = null;
+
+ // The following is no longer used. Remove eventually
+
+ // Pair of supported IANA/Java Encoding names
+ // As for IANA name, see http://www.iana.org/assignments/character-sets
+ /**
+ * @deprecated - left here temporarily for documentation
+ */
+ static final String oldEncodings[][] = {
+ // the key (1st param) is the XML encoding name, names are
+ // case-insensitive
+ // the value (2nd param) is the Java encoding name, names are
+ // case-sensitive
+ // One XML encoding names can be assigned to the one Java
+ // Encoding, and
+ // everything else needs to be alias. See
+ // getIANAEncodingName().
+ {"US-ASCII", "ASCII"}, // US ASCII//$NON-NLS-2$//$NON-NLS-1$
+ // changed to Cp1252 for MS compatibility
+ //{"ISO-8859-1", "ISO8859_1"} // ISO Latin-1
+ {"ISO-8859-1", "Cp1252"}, // ISO
+ // Latin-1//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-2", "ISO8859_2"}, // Central/East European (Slavic:
+ // Czech, Croat, German,
+ // Hungarian, Polish, Romanian,
+ // Slovak,
+ // Slovenian)//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-3", "ISO8859_3"}, // Southern European (Esperanto,
+ // Galician, Maltese,
+ // Turkish)//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-4", "ISO8859_4"}, // Cyrillic (Estonian, Latvian,
+ // Lithuanian)//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-5", "ISO8859_5"}, // Cyrillic (Bulgarian,
+ // Byelorussian, Macedonian,
+ // Serbian,
+ // Ukrainian)//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-6", "ISO8859_6"}, // Arabic(Logical)//$NON-NLS-2$//$NON-NLS-1$
+ {"WINDOWS-1256", "Cp1256"}, // Arabic//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-7", "ISO8859_7"}, // Greek//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-8-I", "ISO8859_8"}, // Hebrew(Logical)//$NON-NLS-2$//$NON-NLS-1$
+ // The above is tricky. but in the code conversion point of
+ // view,
+ // ISO-8 and ISO-8-I are same.
+ {"WINDOWS-1255", "Cp1255"}, // Hebrew//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-8859-9", "ISO8859_9"}, // Turkish//$NON-NLS-2$//$NON-NLS-1$
+ // Japanese
+ {"EUC-JP", "EUC_JP"}, // EUC encoding,
+ // Japanese//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-2022-JP", "ISO2022JP"}, // ISO 2022,
+ // Japanese//$NON-NLS-2$//$NON-NLS-1$
+ // changed for MS compatibility
+ //{"SHIFT_JIS", "SJIS"}, // Shift-JIS, Japanese
+ {"SHIFT_JIS", "MS932"}, // Shift-JIS,
+ // Japanese//$NON-NLS-2$//$NON-NLS-1$
+ // Korean
+ // changed for MS compatibility
+ //{"EUC-KR", "EUC_KR"}, // EUC encoding, Korean
+ {"EUC-KR", "MS949"}, // EUC encoding,
+ // Korean//$NON-NLS-2$//$NON-NLS-1$
+ {"ISO-2022-KR", "ISO2022KR"}, // ISO 2022,
+ // Korean//$NON-NLS-2$//$NON-NLS-1$
+ // Traditional Chinese
+ // changed for MS compatibility
+ //{"BIG5", "Big5"}, // Big5, Traditional Chinese
+ {"BIG5", "MS950"}, // Big5, Traditional
+ // Chinese//$NON-NLS-2$//$NON-NLS-1$
+ // Simplified Chinese(Use IANA MIME preferred name)
+ //{"GB_2312-80", "GBK"}, // GBK, Simplified Chinese
+ {"GB2312", "MS936"}, // GBK, Simplified
+ // Chinese//$NON-NLS-2$//$NON-NLS-1$
+ {"GB18030", "GB18030"}, // GB18030, new Chinese encoding
+ // //$NON-NLS-1$ //$NON-NLS-2$
+ // Thai
+ {"TIS-620", "TIS620"}, // Thai. Thai Industrial Standards
+ // Institute(TISI)//$NON-NLS-2$//$NON-NLS-1$
+ {"WINDOWS-874", "MS874"}, // Microsoft
+ // Thai//$NON-NLS-2$//$NON-NLS-1$
+ // Unicode
+ {"UTF-8", "UTF8"}, // ISO 10646/Unicode, one-byte
+ // encoding//$NON-NLS-2$//$NON-NLS-1$
+ {"UTF-16", "UnicodeBig"}, // ISO 10646/Unicode, two-byte
+ // encoding//$NON-NLS-2$//$NON-NLS-1$
+ {"UTF-16BE", "UnicodeBig"}, // Unicode
+ // BigEndian//$NON-NLS-2$//$NON-NLS-1$
+ {"UTF-16LE", "UnicodeLittle"} // Unicode
+ // LittleEndian//$NON-NLS-2$//$NON-NLS-1$
+ };
+ private static Hashtable supportedAliasNames = null;
+ private static Hashtable supportedEncodingDisplayNames = null;
+ private static Hashtable supportedEncodingNames = null;
+ private static Hashtable supportedIANAEncodingNames = null;
+
+ /**
+ * SupportedJavaEncoding constructor comment.
+ */
+ public SupportedJavaEncoding() {
+ super();
+ initHashTables();
+ initSupportedAliasNames();
+ }
+
+ /**
+ * Returns display (translated) string for IANA encoding name
+ *
+ * @param String
+ * IANA encoding name
+ * @return String translated encoding name from CharsetResourceHandler
+ */
+ public String getDisplayString(String name) {
+ if (name == null)
+ return null;
+
+ return (String) supportedEncodingDisplayNames.get(name);
+ }
+
+ /**
+ * @return java.lang.String
+ * @param iana
+ * java.lang.String
+ *
+ * Convert Java Converter name to IANA encoding Name.
+ */
+ public String getIANAEncodingName(String javaenc) {
+ if (javaenc != null) {
+ return ((String) supportedIANAEncodingNames.get(javaenc.toUpperCase()));
+ }
+ return null;
+ }
+
+ /**
+ * @return java.lang.String
+ * @param iana
+ * java.lang.String
+ *
+ * Convert IANA encoding name to Java Converter Name.
+ */
+ public String getJavaConverterName(String iana) {
+ String iana_name = getUniquefromAlias(iana); // to see if iana is an
+ // alias
+ if (iana_name != null) {
+ return ((String) supportedEncodingNames.get(iana_name.toUpperCase()));
+ }
+ return null;
+ }
+
+ /**
+ * Returns list of all supported IANA encodings
+ *
+ * @return String[]
+ */
+ public String[] getSupportedIANAEncodings() {
+ String[] iana = new String[ianaEncodings.size()];
+ ianaEncodings.toArray(iana);
+ return iana;
+ }
+
+ /**
+ * Returns list of all supported Java encodings
+ *
+ * @return String[]
+ */
+ public String[] getSupportedJavaEncodings() {
+ String[] java = new String[javaEncodings.size()];
+ javaEncodings.toArray(java);
+ return java;
+ }
+
+ /**
+ * @return java.lang.String unique IANA name
+ * @param java.lang.String
+ * possibly alias IANA name (ex: x-..)
+ */
+ public String getUniquefromAlias(String string) {
+ String real = null;
+ if (string != null) {
+ // convert alias IANA(x-...) to 'real' IANA name
+ real = (String) supportedAliasNames.get(string.toUpperCase());
+ }
+ if (real != null)
+ return real;
+ return string;
+ }
+
+ private void initHashTables() {
+ if (supportedEncodingNames == null) {
+ // Initialize hash table for encoding table
+ supportedEncodingNames = new Hashtable();
+ supportedIANAEncodingNames = new Hashtable();
+ supportedEncodingDisplayNames = new Hashtable();
+ ianaEncodings = new ArrayList();
+ javaEncodings = new ArrayList();
+
+ String totalNumString = CharsetResourceHandler.getString("totalnumber");//$NON-NLS-1$
+ int totalNum = 0;
+ if (totalNumString.length() != 0) {
+ totalNum = Integer.valueOf(totalNumString).intValue();
+ }
+
+ for (int i = 0; i < totalNum; i++) {
+ String enc = CharsetResourceHandler.getString("codeset." + i + ".java");//$NON-NLS-2$//$NON-NLS-1$
+ String iana = CharsetResourceHandler.getString("codeset." + i + ".iana");//$NON-NLS-2$//$NON-NLS-1$
+ String displayName = CharsetResourceHandler.getString("codeset." + i + ".label");//$NON-NLS-2$//$NON-NLS-1$
+
+ ianaEncodings.add(iana);
+ supportedEncodingNames.put(iana, enc);
+ supportedEncodingDisplayNames.put(iana, displayName);
+
+ // if ianaenc == UTF-16BE, skip this. Dirty ?
+ // agreeed. but...
+ if (iana.compareToIgnoreCase("UTF-16BE") != 0)//$NON-NLS-1$
+ {
+ // note that the same java encoding can be used my
+ // multiple iana tags (eg, aliases or codepages that
+ // have the same codepoints) thus we only add the first
+ // one as that is the most popular
+ if (!supportedIANAEncodingNames.containsKey(enc.toUpperCase())) {
+ supportedIANAEncodingNames.put(enc.toUpperCase(), iana);
+ javaEncodings.add(enc);
+ }
+ }
+ }
+ }
+ }
+
+ /**
+ */
+ private void initSupportedAliasNames() {
+ if (supportedAliasNames == null) {
+ supportedAliasNames = new Hashtable();
+ int langs = aliases.length;
+ for (int i = 0; i < langs; i++) {
+ String aliasenc = aliases[i][0];
+ String realenc = aliases[i][1];
+ supportedAliasNames.put(aliasenc, realenc);
+ }
+ }
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java
new file mode 100644
index 0000000..6cc5194
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/encoding/util/UnicodeBOMEncodingDetector.java
@@ -0,0 +1,213 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.encoding.util;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.Reader;
+import java.nio.charset.Charset;
+
+import org.eclipse.core.resources.IStorage;
+import org.eclipse.core.runtime.CoreException;
+import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
+import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
+
+
+/**
+ * This is a "common function" class to decide if an input stream, is a
+ * unicode stream.
+ */
+public class UnicodeBOMEncodingDetector implements IResourceCharsetDetector {
+
+ //private static final String UTF_16_CHARSET_NAME = "UTF-16";
+ // //$NON-NLS-1$
+
+ public static class NotEnoughInputForBOMException extends IOException {
+
+ /**
+ * Default <code>serialVersionUID</code>
+ */
+ private static final long serialVersionUID = 1L;
+
+ public NotEnoughInputForBOMException() {
+ super();
+ }
+
+ public NotEnoughInputForBOMException(String s) {
+ super(s);
+ }
+
+ }
+
+ private final static byte BB = (byte) 0xBB;
+ private final static byte BF = (byte) 0xBF;
+ private final static byte EF = (byte) 0xEF;
+ private final static byte FE = (byte) -2;
+
+ private final static byte FF = (byte) -1;
+ private static final String UTF_16BE_CHARSET_NAME = "UTF-16BE"; //$NON-NLS-1$
+ private static final String UTF_16LE_CHARSET_NAME = "UTF-16LE"; //$NON-NLS-1$
+
+ private static final String UTF_8_CHARSET_NAME = "UTF-8"; //$NON-NLS-1$
+
+ private InputStream fInputStream = null;
+ private boolean fNoBOMPossible;
+
+ private EncodingMemento checkForBOM(InputStream inputStream) {
+ EncodingMemento result = null;
+
+ try {
+ byte b1 = getNextByte(inputStream);
+ byte b2 = getNextByte(inputStream);
+ if (b1 == FE && b2 == FF) {
+ result = createEncodingMemento(UTF_16BE_CHARSET_NAME);
+ result.setUnicodeStream(true);
+ } else {
+ if (b1 == FF && b2 == FE) {
+ result = createEncodingMemento(UTF_16LE_CHARSET_NAME);
+ result.setUnicodeStream(true);
+ } else {
+ byte b3 = getNextByte((inputStream));
+ if (b1 == EF && b2 == BB && b3 == BF) {
+ result = createEncodingMemento(UTF_8_CHARSET_NAME);
+ result.setUTF83ByteBOMUsed(true);
+ }
+ }
+ }
+ } catch (NotEnoughInputForBOMException e) {
+ // This is sort of unexpected for normal cases, but can occur for
+ // empty
+ // streams. And, this can occur "normally" for non-BOM streams
+ // that
+ // have only two
+ // bytes, and for which those two bytes match the first two bytes
+ // of UTF-8
+ // BOM In any case, we'll simply return null;
+ result = null;
+ } catch (IOException e) {
+ // other errors should be impossible
+ throw new Error(e);
+ }
+
+ return result;
+ }
+
+ private EncodingMemento createEncodingMemento(String javaEncodingName) {
+ EncodingMemento encodingMemento = new EncodingMemento();
+ encodingMemento.setJavaCharsetName(javaEncodingName);
+ String ianaName = Charset.forName(javaEncodingName).name();
+ encodingMemento.setDetectedCharsetName(ianaName);
+ if (javaEncodingName.equals(UTF_8_CHARSET_NAME)) {
+ encodingMemento.setUTF83ByteBOMUsed(true);
+ }
+ return encodingMemento;
+ }
+
+ public String getEncoding() throws IOException {
+
+ return getEncodingMemento().getDetectedCharsetName();
+ }
+
+ /**
+ * Returns IANA encoding name if BOM detected in stream. If a BOM is
+ * detected, the stream is left positioned after readying the BOM. If a
+ * BOM is not detected, the steam is reset.
+ *
+ * 0xFEFF UTF-16, big-endian 0xFFFE UTF-16, little-endian 0xEFBBBF UTF-8
+ * (BOM is optional)
+ *
+ * @param inputStream -
+ * must be a resetable (mark supported) stream so it can be
+ * reset, if not BOM encoded stream
+ * @return String - IANA encodingname (may not work well on 1.3, but 1.4
+ * seems to have good support for IANA names)
+ */
+ public EncodingMemento getEncodingMemento() {
+
+ EncodingMemento result = null;
+ if (!fNoBOMPossible) {
+
+ if (fInputStream == null)
+ throw new IllegalStateException("input must be set before use"); //$NON-NLS-1$
+
+ if (!fInputStream.markSupported()) {
+ throw new IllegalArgumentException("inputStream must be resetable"); //$NON-NLS-1$
+ }
+
+ result = checkForBOM(fInputStream);
+ }
+
+ return result;
+
+ }
+
+ private byte getNextByte(InputStream inputStream) throws IOException {
+
+ int byteCharAsInt = -1;
+ // be sure we won't block
+ if (inputStream.available() > 0) {
+ byteCharAsInt = inputStream.read();
+ byteCharAsInt = byteCharAsInt & 0XFF;
+ }
+ // to avoid confustion over meaning of returned byte,
+ // throw exception if EOF reached.
+ if (byteCharAsInt == -1)
+ throw new NotEnoughInputForBOMException("typically not an error"); //$NON-NLS-1$
+ return (byte) byteCharAsInt;
+ }
+
+ /**
+ *
+ */
+
+ public String getSpecDefaultEncoding() {
+ // There is no default for this case
+ return null;
+ }
+
+ /**
+ *
+ */
+ private void resetAll() {
+ fNoBOMPossible = false;
+ fInputStream = null;
+
+ }
+
+ /**
+ *
+ */
+
+ public void set(InputStream inputStream) {
+ resetAll();
+ fInputStream = inputStream;
+ }
+
+ public void set(IStorage iStorage) throws CoreException {
+ set(new BufferedInputStream(iStorage.getContents(), CodedIO.MAX_BUF_SIZE));
+
+ }
+
+ public void set(Reader reader) {
+ if (reader instanceof ByteReader) {
+ ByteReader byteReader = (ByteReader) reader;
+ fInputStream = byteReader.fInputStream;
+ } else {
+ fNoBOMPossible = true;
+ }
+
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/CharConversionErrorWithDetail.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/CharConversionErrorWithDetail.java
new file mode 100644
index 0000000..8d5253c
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/CharConversionErrorWithDetail.java
@@ -0,0 +1,43 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.exceptions;
+
+import java.nio.charset.CharacterCodingException;
+
+
+public class CharConversionErrorWithDetail extends CharacterCodingException {
+ /**
+ * Comment for <code>serialVersionUID</code>
+ */
+ private static final long serialVersionUID = 1L;
+ private String fCharsetName;
+
+ public CharConversionErrorWithDetail() {
+ super();
+ }
+
+ /**
+ * @param s
+ */
+ public CharConversionErrorWithDetail(String charsetName) {
+ super();
+ fCharsetName = charsetName;
+ }
+
+ /**
+ * @return Returns the fCharsetName.
+ */
+ public String getCharsetName() {
+ return fCharsetName;
+ }
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/MalformedInputExceptionWithDetail.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/MalformedInputExceptionWithDetail.java
new file mode 100644
index 0000000..629567d
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/MalformedInputExceptionWithDetail.java
@@ -0,0 +1,103 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.exceptions;
+
+import java.nio.charset.CharacterCodingException;
+
+
+/**
+ * Intended to be a more precise form of the MalformedInputException, where
+ * character position and attempted encoding can be attempted.
+ */
+public class MalformedInputExceptionWithDetail extends CharacterCodingException {
+
+ /**
+ * Default <code>serialVersionUID</code>
+ */
+ private static final long serialVersionUID = 1L;
+ private int fCharPosition;
+ private String fDetectedCharsetName;
+ private boolean fExceededMax = false;
+ private String fJavaCharsetName;
+ private int fMaxBuffer;
+
+ /**
+ * Disallow default constructor. If attemptedEncoding and charPostion can
+ * not be provided, use one of java's MalformedException.
+ */
+ protected MalformedInputExceptionWithDetail() {
+ // Nothing to do
+ }
+
+ public MalformedInputExceptionWithDetail(String encodingName, int charPostion) {
+ this.fJavaCharsetName = encodingName;
+ this.fDetectedCharsetName = encodingName;
+ this.fCharPosition = charPostion;
+ }
+
+ public MalformedInputExceptionWithDetail(String attemptedJavaEncoding, String attemptedIANAEncoding, int charPostion) {
+ this.fJavaCharsetName = attemptedJavaEncoding;
+ this.fDetectedCharsetName = attemptedIANAEncoding;
+ this.fCharPosition = charPostion;
+ }
+
+ /**
+ * If charPosition = -1 this could be because the character position
+ * exceeded the maximum buffer size, maxBuffer, then exceededMax = true.
+ */
+ public MalformedInputExceptionWithDetail(String attemptedJavaEncoding, String attemptedIANAEncoding, int charPostion, boolean exceededMax, int maxBuffer) {
+ this.fJavaCharsetName = attemptedJavaEncoding;
+ this.fDetectedCharsetName = attemptedIANAEncoding;
+ this.fCharPosition = charPostion;
+ this.fExceededMax = exceededMax;
+ this.fMaxBuffer = maxBuffer;
+ }
+
+ /**
+ */
+ public java.lang.String getAttemptedIANAEncoding() {
+ return fDetectedCharsetName;
+ }
+
+ /**
+ */
+ public java.lang.String getAttemptedJavaEncoding() {
+ return fJavaCharsetName;
+ }
+
+ /**
+ * @return int
+ */
+ public int getCharPosition() {
+ return fCharPosition;
+ }
+
+ /**
+ * Returns the maxBuffer.
+ *
+ * @return int
+ */
+ public int getMaxBuffer() {
+ return fMaxBuffer;
+ }
+
+ /**
+ * Returns the exceededMax.
+ *
+ * @return boolean
+ */
+ public boolean isExceededMax() {
+ return fExceededMax;
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/MalformedOutputExceptionWithDetail.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/MalformedOutputExceptionWithDetail.java
new file mode 100644
index 0000000..6cf9f7c
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/MalformedOutputExceptionWithDetail.java
@@ -0,0 +1,44 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.exceptions;
+
+
+public class MalformedOutputExceptionWithDetail extends MalformedInputExceptionWithDetail {
+
+ /**
+ * Default <code>serialVersionUID</code>
+ */
+ private static final long serialVersionUID = 1L;
+
+ /**
+ * Disallow default constructor. If attemptedEncoding and charPostion can
+ * not be provided, use sun.io.MalformedException.
+ */
+ private MalformedOutputExceptionWithDetail() {
+ // default constructor is disallowed, since if
+ // extra info can not be provided, the regular
+ // Malformed exception should be thrown
+ }
+
+ /**
+ * Constructor for MalformedOutputExceptionWithDetail.
+ *
+ * @param attemptedJavaEncoding
+ * @param attemptedIANAEncoding
+ * @param charPostion
+ */
+ public MalformedOutputExceptionWithDetail(String attemptedJavaEncoding, String attemptedIANAEncoding, int charPostion) {
+ super(attemptedJavaEncoding, attemptedIANAEncoding, charPostion);
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/UnsupportedCharsetExceptionWithDetail.java b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/UnsupportedCharsetExceptionWithDetail.java
new file mode 100644
index 0000000..7cdc40b
--- /dev/null
+++ b/bundles/org.eclipse.wst.sse.core/src-encoding/org/eclipse/wst/sse/core/internal/exceptions/UnsupportedCharsetExceptionWithDetail.java
@@ -0,0 +1,47 @@
+/*******************************************************************************
+ * Copyright (c) 2001, 2004 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ *
+ * Contributors:
+ * IBM Corporation - initial API and implementation
+ * Jens Lukowski/Innoopract - initial renaming/restructuring
+ *
+ *******************************************************************************/
+package org.eclipse.wst.sse.core.internal.exceptions;
+
+import java.nio.charset.UnsupportedCharsetException;
+
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
+
+
+/**
+ * This is intended for same purpose as it super class, but simply provides
+ * more information about than the name in error. This is especially useful
+ * for "UIs" which can present users with the error, and the
+ * "appropriateDefault" that can be used for a particular input.
+ */
+public class UnsupportedCharsetExceptionWithDetail extends UnsupportedCharsetException {
+
+ /**
+ * Default <code>serialVersionUID</code>
+ */
+ private static final long serialVersionUID = 1L;
+ private EncodingMemento fEncodingMementio;
+
+ public UnsupportedCharsetExceptionWithDetail(EncodingMemento encodingMemento) {
+ this(encodingMemento.getDetectedCharsetName());
+ fEncodingMementio = encodingMemento;
+ }
+
+ protected UnsupportedCharsetExceptionWithDetail(String charsetName) {
+ super(charsetName);
+ }
+
+ public EncodingMemento getEncodingMemento() {
+ return fEncodingMementio;
+ }
+
+}
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractModelLoader.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractModelLoader.java
index 01cfed6..337b1d8 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractModelLoader.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractModelLoader.java
@@ -22,11 +22,11 @@
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.jface.text.IDocumentExtension3;
-import org.eclipse.wst.common.encoding.EncodingMemento;
-import org.eclipse.wst.common.encoding.EncodingRule;
import org.eclipse.wst.sse.core.document.IDocumentLoader;
import org.eclipse.wst.sse.core.document.IEncodedDocument;
import org.eclipse.wst.sse.core.internal.Logger;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
import org.eclipse.wst.sse.core.internal.text.BasicStructuredDocument;
import org.eclipse.wst.sse.core.parser.BlockMarker;
import org.eclipse.wst.sse.core.parser.BlockTagParser;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractStructuredModel.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractStructuredModel.java
index 7a3b465..508f654 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractStructuredModel.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/AbstractStructuredModel.java
@@ -24,7 +24,6 @@
import org.eclipse.core.runtime.IPath;
import org.eclipse.core.runtime.Platform;
import org.eclipse.core.runtime.jobs.ILock;
-import org.eclipse.wst.common.encoding.EncodingRule;
import org.eclipse.wst.sse.core.document.ILockable;
import org.eclipse.wst.sse.core.events.AboutToBeChangeEvent;
import org.eclipse.wst.sse.core.events.IModelAboutToBeChangedListener;
@@ -38,6 +37,7 @@
import org.eclipse.wst.sse.core.exceptions.SourceEditingRuntimeException;
import org.eclipse.wst.sse.core.internal.Logger;
import org.eclipse.wst.sse.core.internal.SSECorePlugin;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
import org.eclipse.wst.sse.core.modelhandler.IModelHandler;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
import org.eclipse.wst.sse.core.undo.IStructuredTextUndoManager;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IModelManager.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IModelManager.java
index 31c83e2..9580c58 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IModelManager.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IModelManager.java
@@ -20,9 +20,9 @@
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.jface.text.IDocument;
-import org.eclipse.wst.common.encoding.EncodingRule;
import org.eclipse.wst.sse.core.exceptions.ResourceAlreadyExists;
import org.eclipse.wst.sse.core.exceptions.ResourceInUse;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
import org.eclipse.wst.sse.core.util.URIResolver;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IStructuredModel.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IStructuredModel.java
index 90ea167..445e839 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IStructuredModel.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/IStructuredModel.java
@@ -21,9 +21,9 @@
import org.eclipse.core.resources.IResource;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.core.runtime.IAdaptable;
-import org.eclipse.wst.common.encoding.EncodingRule;
import org.eclipse.wst.sse.core.exceptions.ResourceAlreadyExists;
import org.eclipse.wst.sse.core.exceptions.ResourceInUse;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
import org.eclipse.wst.sse.core.modelhandler.IModelHandler;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
import org.eclipse.wst.sse.core.undo.IStructuredTextUndoManager;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelDumper.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelDumper.java
index 7cf9cae..4847246 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelDumper.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelDumper.java
@@ -20,7 +20,7 @@
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
-import org.eclipse.wst.common.encoding.EncodingRule;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
/**
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelLoader.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelLoader.java
index b44c521..045be31 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelLoader.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelLoader.java
@@ -18,7 +18,7 @@
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
-import org.eclipse.wst.common.encoding.EncodingRule;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
/**
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelManagerImpl.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelManagerImpl.java
index 2000ffe..f7f5819 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelManagerImpl.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/ModelManagerImpl.java
@@ -44,13 +44,6 @@
import org.eclipse.text.edits.MultiTextEdit;
import org.eclipse.text.edits.ReplaceEdit;
import org.eclipse.text.edits.TextEdit;
-import org.eclipse.wst.common.encoding.CodedIO;
-import org.eclipse.wst.common.encoding.CodedStreamCreator;
-import org.eclipse.wst.common.encoding.CommonEncodingPreferenceNames;
-import org.eclipse.wst.common.encoding.ContentBasedPreferenceGateway;
-import org.eclipse.wst.common.encoding.EncodingMemento;
-import org.eclipse.wst.common.encoding.EncodingRule;
-import org.eclipse.wst.common.encoding.exceptions.MalformedOutputExceptionWithDetail;
import org.eclipse.wst.sse.core.document.DocumentReader;
import org.eclipse.wst.sse.core.document.IDocumentLoader;
import org.eclipse.wst.sse.core.document.IEncodedDocument;
@@ -61,6 +54,13 @@
import org.eclipse.wst.sse.core.internal.Logger;
import org.eclipse.wst.sse.core.internal.NullMemento;
import org.eclipse.wst.sse.core.internal.SSECorePlugin;
+import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
+import org.eclipse.wst.sse.core.internal.encoding.CodedStreamCreator;
+import org.eclipse.wst.sse.core.internal.encoding.CommonEncodingPreferenceNames;
+import org.eclipse.wst.sse.core.internal.encoding.ContentBasedPreferenceGateway;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
+import org.eclipse.wst.sse.core.internal.exceptions.MalformedOutputExceptionWithDetail;
import org.eclipse.wst.sse.core.internal.modelhandler.ModelHandlerRegistry;
import org.eclipse.wst.sse.core.modelhandler.IModelHandler;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/NullModelManager.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/NullModelManager.java
index 5be0556..bb63c5d 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/NullModelManager.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/NullModelManager.java
@@ -20,9 +20,9 @@
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.jface.text.IDocument;
-import org.eclipse.wst.common.encoding.EncodingRule;
import org.eclipse.wst.sse.core.exceptions.ResourceAlreadyExists;
import org.eclipse.wst.sse.core.exceptions.ResourceInUse;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
import org.eclipse.wst.sse.core.util.URIResolver;
@@ -284,7 +284,7 @@
* (non-Javadoc)
*
* @see org.eclipse.wst.sse.core.IModelManager#getModelForEdit(org.eclipse.core.resources.IFile,
- * org.eclipse.wst.common.encoding.EncodingRule)
+ * org.eclipse.wst.sse.core.internal.encoding.EncodingRule)
*/
public IStructuredModel getModelForEdit(IFile iFile, EncodingRule encodingRule) throws UnsupportedEncodingException, IOException, CoreException {
@@ -327,7 +327,7 @@
* (non-Javadoc)
*
* @see org.eclipse.wst.sse.core.IModelManager#getModelForRead(org.eclipse.core.resources.IFile,
- * org.eclipse.wst.common.encoding.EncodingRule)
+ * org.eclipse.wst.sse.core.internal.encoding.EncodingRule)
*/
public IStructuredModel getModelForRead(IFile iFile, EncodingRule encodingRule) throws UnsupportedEncodingException, IOException, CoreException {
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/AbstractDocumentLoader.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/AbstractDocumentLoader.java
index 87997df..f5a46bc 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/AbstractDocumentLoader.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/AbstractDocumentLoader.java
@@ -26,14 +26,14 @@
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IDocumentExtension3;
import org.eclipse.jface.text.IDocumentPartitioner;
-import org.eclipse.wst.common.encoding.CodedIO;
-import org.eclipse.wst.common.encoding.CodedReaderCreator;
-import org.eclipse.wst.common.encoding.ContentTypeEncodingPreferences;
-import org.eclipse.wst.common.encoding.EncodingMemento;
-import org.eclipse.wst.common.encoding.EncodingRule;
-import org.eclipse.wst.common.encoding.exceptions.MalformedInputExceptionWithDetail;
import org.eclipse.wst.sse.core.internal.document.NullStructuredDocumentPartitioner;
import org.eclipse.wst.sse.core.internal.document.TextUtilities;
+import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
+import org.eclipse.wst.sse.core.internal.encoding.CodedReaderCreator;
+import org.eclipse.wst.sse.core.internal.encoding.ContentTypeEncodingPreferences;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
+import org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentCharsetDetector.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentCharsetDetector.java
index fcd1ce6..660dc44 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentCharsetDetector.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentCharsetDetector.java
@@ -13,7 +13,7 @@
package org.eclipse.wst.sse.core.document;
import org.eclipse.jface.text.IDocument;
-import org.eclipse.wst.common.encoding.IResourceCharsetDetector;
+import org.eclipse.wst.sse.core.internal.encoding.IResourceCharsetDetector;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentLoader.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentLoader.java
index 3d73a04..419706c 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentLoader.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IDocumentLoader.java
@@ -20,7 +20,7 @@
import org.eclipse.core.resources.IFile;
import org.eclipse.core.runtime.CoreException;
import org.eclipse.jface.text.IDocumentPartitioner;
-import org.eclipse.wst.common.encoding.EncodingRule;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingRule;
public interface IDocumentLoader {
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IEncodedDocument.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IEncodedDocument.java
index 95da1bb..1f6db4f 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IEncodedDocument.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/IEncodedDocument.java
@@ -13,7 +13,7 @@
package org.eclipse.wst.sse.core.document;
import org.eclipse.jface.text.IDocument;
-import org.eclipse.wst.common.encoding.EncodingMemento;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/StructuredDocumentLoader.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/StructuredDocumentLoader.java
index 25e5a73..ff7a2ab 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/StructuredDocumentLoader.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/document/StructuredDocumentLoader.java
@@ -29,9 +29,9 @@
import org.eclipse.core.runtime.QualifiedName;
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.jface.text.IDocument;
-import org.eclipse.wst.common.encoding.CodedIO;
-import org.eclipse.wst.common.encoding.IContentDescriptionExtended;
-import org.eclipse.wst.common.encoding.exceptions.MalformedInputExceptionWithDetail;
+import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
+import org.eclipse.wst.sse.core.internal.encoding.IContentDescriptionExtended;
+import org.eclipse.wst.sse.core.internal.exceptions.MalformedInputExceptionWithDetail;
public class StructuredDocumentLoader {
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/NullMemento.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/NullMemento.java
index 8eb1346..af3c72c 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/NullMemento.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/NullMemento.java
@@ -12,8 +12,8 @@
*******************************************************************************/
package org.eclipse.wst.sse.core.internal;
-import org.eclipse.wst.common.encoding.EncodingMemento;
-import org.eclipse.wst.common.encoding.NonContentBasedEncodingRules;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
+import org.eclipse.wst.sse.core.internal.encoding.NonContentBasedEncodingRules;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/SSECorePlugin.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/SSECorePlugin.java
index cf9dae5..e243ee5 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/SSECorePlugin.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/SSECorePlugin.java
@@ -18,11 +18,11 @@
import org.eclipse.core.runtime.Plugin;
import org.eclipse.core.runtime.Preferences;
-import org.eclipse.wst.common.encoding.CommonEncodingPreferenceNames;
import org.eclipse.wst.sse.core.IModelManager;
import org.eclipse.wst.sse.core.IModelManagerPlugin;
import org.eclipse.wst.sse.core.StructuredModelManager;
import org.eclipse.wst.sse.core.internal.builder.StructuredDocumentBuilder;
+import org.eclipse.wst.sse.core.internal.encoding.CommonEncodingPreferenceNames;
import org.eclipse.wst.sse.core.internal.modelhandler.ModelHandlerRegistry;
import org.eclipse.wst.sse.core.preferences.CommonModelPreferenceNames;
import org.osgi.framework.BundleContext;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/cleanup/AbstractStructuredCleanupProcessor.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/cleanup/AbstractStructuredCleanupProcessor.java
index 610fcd1..d11004a 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/cleanup/AbstractStructuredCleanupProcessor.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/cleanup/AbstractStructuredCleanupProcessor.java
@@ -26,11 +26,11 @@
import org.eclipse.jface.text.IDocument;
import org.eclipse.jface.text.IRegion;
import org.eclipse.jface.text.TextUtilities;
-import org.eclipse.wst.common.encoding.CommonEncodingPreferenceNames;
import org.eclipse.wst.sse.core.IStructuredModel;
import org.eclipse.wst.sse.core.StructuredModelManager;
import org.eclipse.wst.sse.core.format.IStructuredFormatProcessor;
import org.eclipse.wst.sse.core.internal.Logger;
+import org.eclipse.wst.sse.core.internal.encoding.CommonEncodingPreferenceNames;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
import org.w3c.dom.Attr;
import org.w3c.dom.Node;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/modelhandler/ModelHandlerRegistry.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/modelhandler/ModelHandlerRegistry.java
index 383af34..67bad50 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/modelhandler/ModelHandlerRegistry.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/modelhandler/ModelHandlerRegistry.java
@@ -23,8 +23,8 @@
import org.eclipse.core.runtime.content.IContentDescription;
import org.eclipse.core.runtime.content.IContentType;
import org.eclipse.core.runtime.content.IContentTypeManager;
-import org.eclipse.wst.common.encoding.CodedIO;
import org.eclipse.wst.sse.core.internal.Logger;
+import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
import org.eclipse.wst.sse.core.modelhandler.IModelHandler;
import org.eclipse.wst.sse.core.util.Utilities;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/BasicStructuredDocument.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/BasicStructuredDocument.java
index bf8081a..e83b509 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/BasicStructuredDocument.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/BasicStructuredDocument.java
@@ -45,7 +45,6 @@
import org.eclipse.jface.text.Position;
import org.eclipse.jface.text.SequentialRewriteTextStore;
import org.eclipse.jface.text.TypedRegion;
-import org.eclipse.wst.common.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.document.StructuredDocumentFactory;
import org.eclipse.wst.sse.core.events.AboutToBeChangeEvent;
import org.eclipse.wst.sse.core.events.IModelAboutToBeChangedListener;
@@ -59,6 +58,7 @@
import org.eclipse.wst.sse.core.events.StructuredDocumentRegionsReplacedEvent;
import org.eclipse.wst.sse.core.exceptions.SourceEditingRuntimeException;
import org.eclipse.wst.sse.core.internal.Logger;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.internal.undo.StructuredTextUndoManager;
import org.eclipse.wst.sse.core.parser.RegionParser;
import org.eclipse.wst.sse.core.text.DeleteEqualPositionUpdater;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/MinimalDocument.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/MinimalDocument.java
index 87a1fe8..1bfb326 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/MinimalDocument.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/internal/text/MinimalDocument.java
@@ -22,7 +22,6 @@
import org.eclipse.jface.text.ITypedRegion;
import org.eclipse.jface.text.Position;
import org.eclipse.jface.text.TypedRegion;
-import org.eclipse.wst.common.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.events.IModelAboutToBeChangedListener;
import org.eclipse.wst.sse.core.events.IStructuredDocumentListener;
import org.eclipse.wst.sse.core.events.NewModelEvent;
@@ -30,6 +29,7 @@
import org.eclipse.wst.sse.core.internal.Logger;
import org.eclipse.wst.sse.core.internal.NotImplementedException;
import org.eclipse.wst.sse.core.internal.document.NullStructuredDocumentPartitioner;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.parser.RegionParser;
import org.eclipse.wst.sse.core.text.IStructuredDocument;
import org.eclipse.wst.sse.core.text.IStructuredDocumentRegion;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/preferences/CommonModelPreferenceNames.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/preferences/CommonModelPreferenceNames.java
index 13f15f3..4ab7989 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/preferences/CommonModelPreferenceNames.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/preferences/CommonModelPreferenceNames.java
@@ -12,7 +12,7 @@
*******************************************************************************/
package org.eclipse.wst.sse.core.preferences;
-import org.eclipse.wst.common.encoding.CommonEncodingPreferenceNames;
+import org.eclipse.wst.sse.core.internal.encoding.CommonEncodingPreferenceNames;
/**
* Here is a description of how each common model file preference is used.
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/text/IStructuredDocument.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/text/IStructuredDocument.java
index 7e2cb01..9e595b2 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/text/IStructuredDocument.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/text/IStructuredDocument.java
@@ -14,12 +14,12 @@
import org.eclipse.core.runtime.IAdaptable;
import org.eclipse.jface.text.IDocumentExtension;
-import org.eclipse.wst.common.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.document.IEncodedDocument;
import org.eclipse.wst.sse.core.events.IModelAboutToBeChangedListener;
import org.eclipse.wst.sse.core.events.IStructuredDocumentListener;
import org.eclipse.wst.sse.core.events.NewModelEvent;
import org.eclipse.wst.sse.core.events.StructuredDocumentEvent;
+import org.eclipse.wst.sse.core.internal.encoding.EncodingMemento;
import org.eclipse.wst.sse.core.parser.RegionParser;
import org.eclipse.wst.sse.core.undo.IStructuredTextUndoManager;
diff --git a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/util/Utilities.java b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/util/Utilities.java
index c3c6a3d..b84e219 100644
--- a/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/util/Utilities.java
+++ b/bundles/org.eclipse.wst.sse.core/src/org/eclipse/wst/sse/core/util/Utilities.java
@@ -17,8 +17,8 @@
import java.io.BufferedInputStream;
import java.io.InputStream;
-import org.eclipse.wst.common.encoding.CodedIO;
-import org.eclipse.wst.common.encoding.internal.BufferedLimitedStream;
+import org.eclipse.wst.sse.core.internal.encoding.CodedIO;
+import org.eclipse.wst.sse.core.internal.encoding.util.BufferedLimitedStream;