Skip to main content
aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java225
-rw-r--r--jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuffer.java54
-rw-r--r--jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuilder.java48
-rw-r--r--jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBufferTest.java112
-rw-r--r--jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBuilderTest.java118
5 files changed, 265 insertions, 292 deletions
diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java
index b869f431b9..c646979de5 100644
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8Appendable.java
@@ -1,180 +1,181 @@
+// ========================================================================
+// Copyright (c) 2006-2009 Mort Bay Consulting Pty. Ltd.
+// ------------------------------------------------------------------------
+// All rights reserved. This program and the accompanying materials
+// are made available under the terms of the Eclipse Public License v1.0
+// and Apache License v2.0 which accompanies this distribution.
+// The Eclipse Public License is available at
+// http://www.eclipse.org/legal/epl-v10.html
+// The Apache License v2.0 is available at
+// http://www.opensource.org/licenses/apache2.0.php
+// You may elect to redistribute this code under either of these licenses.
+// ========================================================================
package org.eclipse.jetty.util;
import java.io.IOException;
-import java.util.IllegalFormatCodePointException;
+/* ------------------------------------------------------------ */
+/**
+ * Utf8 Appendable abstract base class
+ *
+ * This abstract class wraps a standard {@link java.lang.Appendable} and provides methods to append UTF-8 encoded bytes, that are converted into characters.
+ *
+ * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before state a character is appended to the string buffer.
+ *
+ * The UTF-8 decoding is done by this class and no additional buffers or Readers are used. The UTF-8 code was inspired by
+ * http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ *
+ * License information for Bjoern Hoehrmann's code:
+ *
+ * Copyright (c) 2008-2009 Bjoern Hoehrmann <bjoern@hoehrmann.de>
+ * Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
+ * FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
+ * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ **/
public abstract class Utf8Appendable
{
private final char REPLACEMENT = '\ufffd';
+ private static final int UTF8_ACCEPT = 0;
+ private static final int UTF8_REJECT = 12;
+
protected final Appendable _appendable;
- protected int _expectedContinuationBytes;
- protected int _codePoint;
- protected int _minCodePoint;
+ protected int _state = UTF8_ACCEPT;
+
+ private static final byte[] BYTE_TABLE =
+ {
+ // The first part of the table maps bytes to character classes that
+ // to reduce the size of the transition table and create bitmasks.
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 10,3,3,3,3,3,3,3,3,3,3,3,3,4,3,3, 11,6,6,6,5,8,8,8,8,8,8,8,8,8,8,8
+ };
+
+ private static final byte[] TRANS_TABLE =
+ {
+ // The second part is a transition table that maps a combination
+ // of a state of the automaton and a character class to a state.
+ 0,12,24,36,60,96,84,12,12,12,48,72, 12,12,12,12,12,12,12,12,12,12,12,12,
+ 12, 0,12,12,12,12,12, 0,12, 0,12,12, 12,24,12,12,12,12,12,24,12,24,12,12,
+ 12,12,12,12,12,12,12,24,12,12,12,12, 12,24,12,12,12,12,12,12,12,24,12,12,
+ 12,12,12,12,12,12,12,36,12,36,12,12, 12,36,12,12,12,12,12,36,12,36,12,12,
+ 12,36,12,12,12,12,12,12,12,12,12,12
+ };
+
+ private int _codep;
public Utf8Appendable(Appendable appendable)
{
- _appendable=appendable;
+ _appendable = appendable;
}
public abstract int length();
-
+
+ protected void reset()
+ {
+ _state = UTF8_ACCEPT;
+ }
+
public void append(byte b)
{
try
{
appendByte(b);
}
- catch(IOException e)
+ catch (IOException e)
{
throw new RuntimeException(e);
}
}
-
- public void append(byte[] b,int offset, int length)
+
+ public void append(byte[] b, int offset, int length)
{
try
{
- int end=offset+length;
- for (int i=offset; i<end;i++)
+ int end = offset + length;
+ for (int i = offset; i < end; i++)
appendByte(b[i]);
}
- catch(IOException e)
+ catch (IOException e)
{
throw new RuntimeException(e);
}
}
- public boolean append(byte[] b,int offset, int length, int maxChars)
+ public boolean append(byte[] b, int offset, int length, int maxChars)
{
try
{
- int end=offset+length;
- for (int i=offset; i<end;i++)
+ int end = offset + length;
+ for (int i = offset; i < end; i++)
{
- if (length()>maxChars)
+ if (length() > maxChars)
return false;
appendByte(b[i]);
}
return true;
}
- catch(IOException e)
+ catch (IOException e)
{
throw new RuntimeException(e);
}
}
-
+
protected void appendByte(byte b) throws IOException
{
- // Check for invalid bytes
- if (b==(byte)0xc0 || b==(byte)0xc1 || (int)b>=0xf5)
+
+ if (b > 0 && isUtf8SequenceComplete())
{
- _appendable.append(REPLACEMENT);
- _expectedContinuationBytes=0;
- _codePoint=0;
- throw new NotUtf8Exception();
+ _appendable.append((char)(b & 0xFF));
}
-
- // Is it plain ASCII?
- if (b>=0)
- {
- // Were we expecting a continuation byte?
- if (_expectedContinuationBytes>0)
- {
- _appendable.append(REPLACEMENT);
- _expectedContinuationBytes=0;
- _codePoint=0;
- throw new NotUtf8Exception();
- }
- else
- _appendable.append((char)(0x7f&b));
- }
- // Else is this a start byte
- else if (_expectedContinuationBytes==0)
+ else
{
- if ((b & 0xe0) == 0xc0)
- {
- //110xxxxx
- _expectedContinuationBytes=1;
- _codePoint=b&0x1f;
- _minCodePoint=0x80;
- }
- else if ((b & 0xf0) == 0xe0)
- {
- //1110xxxx
- _expectedContinuationBytes=2;
- _codePoint=b&0x0f;
- _minCodePoint=0x800;
- }
- else if ((b & 0xf8) == 0xf0)
- {
- //11110xxx
- _expectedContinuationBytes=3;
- _codePoint=b&0x07;
- _minCodePoint=0x10000;
- }
- else if ((b & 0xfc) == 0xf8)
- {
- //111110xx
- _expectedContinuationBytes=4;
- _codePoint=b&0x03;
- _minCodePoint=0x200000;
- }
- else if ((b & 0xfe) == 0xfc)
+ int i = b & 0xFF;
+ int type = BYTE_TABLE[i];
+ _codep = isUtf8SequenceComplete() ? (0xFF >> type) & i : (i & 0x3F) | (_codep << 6);
+ _state = TRANS_TABLE[_state + type];
+
+ if (isUtf8SequenceComplete())
{
- //1111110x
- _expectedContinuationBytes=5;
- _codePoint=b&0x01;
- _minCodePoint=0x400000;
+ if (_codep < Character.MIN_HIGH_SURROGATE)
+ {
+ _appendable.append((char)_codep);
+ }
+ else
+ {
+ for (char c : Character.toChars(_codep))
+ _appendable.append(c);
+ }
}
- else
+ else if (_state == UTF8_REJECT)
{
+ _state = UTF8_ACCEPT;
_appendable.append(REPLACEMENT);
- _expectedContinuationBytes=0;
- _codePoint=0;
throw new NotUtf8Exception();
}
}
- // else is this a continuation character
- else if ((b&0xc0)==0x80)
- {
- // 10xxxxxx
- _codePoint=(_codePoint<<6)|(b&0x3f);
-
- // was that the last continuation?
- if (--_expectedContinuationBytes==0)
- {
- // If this a valid unicode point?
- if (_codePoint<_minCodePoint || (_codePoint>=0xD800 && _codePoint<=0xDFFF))
- {
- _appendable.append(REPLACEMENT);
- _expectedContinuationBytes=0;
- _codePoint=0;
- throw new NotUtf8Exception();
- }
-
- _minCodePoint=0;
- char[] chars = Character.toChars(_codePoint);
- for (char c : chars)
- _appendable.append(c);
- }
- }
- // Else this is not a continuation character
- else
- {
- // ! 10xxxxxx
- _appendable.append(REPLACEMENT);
- _expectedContinuationBytes=0;
- _codePoint=0;
- throw new NotUtf8Exception();
- }
}
+ protected boolean isUtf8SequenceComplete()
+ {
+ return _state == UTF8_ACCEPT;
+ }
public static class NotUtf8Exception extends IllegalArgumentException
{
public NotUtf8Exception()
{
- super("!UTF-8");
+ super("Not valid UTF8!");
}
}
-} \ No newline at end of file
+}
diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuffer.java b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuffer.java
index bd730deabf..b86058e584 100644
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuffer.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuffer.java
@@ -4,71 +4,73 @@
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at
+// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses.
+// You may elect to redistribute this code under either of these licenses.
// ========================================================================
package org.eclipse.jetty.util;
-import java.io.IOException;
-
/* ------------------------------------------------------------ */
-/** UTF-8 StringBuffer.
+/**
+ * UTF-8 StringBuffer.
*
- * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
+ * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
* UTF-8 encoded bytes, that are converted into characters.
- *
- * This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
+ *
+ * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before
* state a character is appended to the string buffer.
- *
+ *
* The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
- * The UTF-8 code was inspired by http://javolution.org
- *
- * This class is not synchronised and should probably be called Utf8StringBuilder
+ * The UTF-8 code was inspired by http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
*/
-public class Utf8StringBuffer extends Utf8Appendable
+public class Utf8StringBuffer extends Utf8Appendable
{
final StringBuffer _buffer;
-
+
public Utf8StringBuffer()
{
super(new StringBuffer());
- _buffer=(StringBuffer)_appendable;
+ _buffer = (StringBuffer)_appendable;
}
-
+
public Utf8StringBuffer(int capacity)
{
super(new StringBuffer(capacity));
- _buffer=(StringBuffer)_appendable;
+ _buffer = (StringBuffer)_appendable;
}
+ @Override
public int length()
{
return _buffer.length();
}
-
+
+ @Override
public void reset()
{
+ super.reset();
_buffer.setLength(0);
- _expectedContinuationBytes=0;
- _codePoint=0;
}
-
+
public StringBuffer getStringBuffer()
{
- if (_expectedContinuationBytes!=0)
- throw new NotUtf8Exception();
+ checkState();
return _buffer;
}
-
+
@Override
public String toString()
{
- if (_expectedContinuationBytes!=0)
- throw new NotUtf8Exception();
+ checkState();
return _buffer.toString();
}
+
+ private void checkState()
+ {
+ if (!isUtf8SequenceComplete())
+ throw new IllegalArgumentException("Tried to read incomplete UTF8 decoded String");
+ }
}
diff --git a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuilder.java b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuilder.java
index 541590f642..09866884ea 100644
--- a/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuilder.java
+++ b/jetty-util/src/main/java/org/eclipse/jetty/util/Utf8StringBuilder.java
@@ -4,70 +4,74 @@
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at
+// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses.
+// You may elect to redistribute this code under either of these licenses.
// ========================================================================
package org.eclipse.jetty.util;
-import java.io.IOException;
/* ------------------------------------------------------------ */
/** UTF-8 StringBuilder.
*
- * This class wraps a standard {@link java.lang.StringBuffer} and provides methods to append
+ * This class wraps a standard {@link java.lang.StringBuilder} and provides methods to append
* UTF-8 encoded bytes, that are converted into characters.
- *
- * This class is stateful and up to 6 calls to {@link #append(byte)} may be needed before
+ *
+ * This class is stateful and up to 4 calls to {@link #append(byte)} may be needed before
* state a character is appended to the string buffer.
- *
+ *
* The UTF-8 decoding is done by this class and no additional buffers or Readers are used.
- * The UTF-8 code was inspired by http://javolution.org
- *
+ * The UTF-8 code was inspired by http://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ *
*/
-public class Utf8StringBuilder extends Utf8Appendable
+public class Utf8StringBuilder extends Utf8Appendable
{
final StringBuilder _buffer;
-
+
public Utf8StringBuilder()
{
super(new StringBuilder());
_buffer=(StringBuilder)_appendable;
}
-
+
public Utf8StringBuilder(int capacity)
{
super(new StringBuilder(capacity));
_buffer=(StringBuilder)_appendable;
}
-
+
+ @Override
public int length()
{
return _buffer.length();
}
-
+
+ @Override
public void reset()
{
+ super.reset();
_buffer.setLength(0);
- _expectedContinuationBytes=0;
- _codePoint=0;
}
-
+
public StringBuilder getStringBuilder()
{
- if (_expectedContinuationBytes!=0)
- throw new NotUtf8Exception();
+ checkState();
return _buffer;
}
-
+
@Override
public String toString()
{
- if (_expectedContinuationBytes!=0)
- throw new NotUtf8Exception();
+ checkState();
return _buffer.toString();
}
+
+ private void checkState()
+ {
+ if (!isUtf8SequenceComplete())
+ throw new IllegalArgumentException("Tried to read incomplete UTF8 decoded String");
+ }
}
diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBufferTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBufferTest.java
index 9c44625e8f..eacd85b33b 100644
--- a/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBufferTest.java
+++ b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBufferTest.java
@@ -4,92 +4,98 @@
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at
+// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses.
+// You may elect to redistribute this code under either of these licenses.
// ========================================================================
package org.eclipse.jetty.util;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
+import java.io.UnsupportedEncodingException;
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
public class Utf8StringBufferTest
{
- public void testUtfStringBuffer()
- throws Exception
+ @Test
+ public void testUtfStringBuffer() throws Exception
{
- String source="abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
+ String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
byte[] bytes = source.getBytes(StringUtil.__UTF8);
Utf8StringBuffer buffer = new Utf8StringBuffer();
- for (int i=0;i<bytes.length;i++)
- buffer.append(bytes[i]);
- assertEquals(source, buffer.toString());
- assertTrue(buffer.toString().endsWith("jetty"));
+ for (byte aByte : bytes)
+ buffer.append(aByte);
+ assertEquals(source,buffer.toString());
+ assertTrue(buffer.toString().endsWith("jetty"));
}
-
- @Test
- public void testShort()
- throws Exception
+ @Test(expected = IllegalArgumentException.class)
+ public void testUtf8WithMissingByte() throws Exception
{
- String source="abc\u10fb";
+ String source = "abc\u10fb";
byte[] bytes = source.getBytes(StringUtil.__UTF8);
Utf8StringBuffer buffer = new Utf8StringBuffer();
- for (int i=0;i<bytes.length-1;i++)
+ for (int i = 0; i < bytes.length - 1; i++)
buffer.append(bytes[i]);
- try
- {
- buffer.toString();
- assertTrue(false);
- }
- catch(Utf8Appendable.NotUtf8Exception e)
- {
- assertTrue(true);
- }
+ buffer.toString();
}
-
- @Test
- public void testLong()
- throws Exception
+
+ @Test(expected = Utf8Appendable.NotUtf8Exception.class)
+ public void testUtf8WithAdditionalByte() throws Exception
{
- String source="abcXX";
+ String source = "abcXX";
byte[] bytes = source.getBytes(StringUtil.__UTF8);
- bytes[3]=(byte)0xc0;
- bytes[4]=(byte)0x00;
+ bytes[3] = (byte)0xc0;
+ bytes[4] = (byte)0x00;
Utf8StringBuffer buffer = new Utf8StringBuffer();
- try
- {
- for (int i=0;i<bytes.length;i++)
- buffer.append(bytes[i]);
- assertTrue(false);
- }
- catch(Utf8Appendable.NotUtf8Exception e)
- {
- assertTrue(e.toString().indexOf("!UTF-8")>=0);
- }
- assertEquals("abc\ufffd",buffer.toString());
+ for (byte aByte : bytes)
+ buffer.append(aByte);
}
-
- @Test
- public void testUTF32codes()
- throws Exception
+
+
+ @Test
+ public void testUTF32codes() throws Exception
{
- String source="\uD842\uDF9F";
- byte[] bytes=source.getBytes("UTF-8");
-
+ String source = "\uD842\uDF9F";
+ byte[] bytes = source.getBytes("UTF-8");
+
String jvmcheck = new String(bytes,0,bytes.length,"UTF-8");
assertEquals(source,jvmcheck);
-
+
Utf8StringBuffer buffer = new Utf8StringBuffer();
buffer.append(bytes,0,bytes.length);
- String result=buffer.toString();
+ String result = buffer.toString();
assertEquals(source,result);
}
+ @Test
+ public void testGermanUmlauts() throws Exception
+ {
+ byte[] bytes = new byte[6];
+ bytes[0] = (byte)0xC3;
+ bytes[1] = (byte)0xBC;
+ bytes[2] = (byte)0xC3;
+ bytes[3] = (byte)0xB6;
+ bytes[4] = (byte)0xC3;
+ bytes[5] = (byte)0xA4;
+
+ Utf8StringBuffer buffer = new Utf8StringBuffer();
+ for (int i = 0; i < bytes.length; i++)
+ buffer.append(bytes[i]);
+
+ assertEquals("\u00FC\u00F6\u00E4",buffer.toString());
+ }
+
+ @Test(expected = Utf8Appendable.NotUtf8Exception.class)
+ public void testInvalidUTF8() throws UnsupportedEncodingException
+ {
+ Utf8StringBuffer buffer = new Utf8StringBuffer();
+ buffer.append((byte)0xC2);
+ buffer.append((byte)0xC2);
+ }
}
diff --git a/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBuilderTest.java b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBuilderTest.java
index bfa0cccd87..b83aa1099d 100644
--- a/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBuilderTest.java
+++ b/jetty-util/src/test/java/org/eclipse/jetty/util/Utf8StringBuilderTest.java
@@ -4,142 +4,102 @@
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
-// The Eclipse Public License is available at
+// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
-// You may elect to redistribute this code under either of these licenses.
+// You may elect to redistribute this code under either of these licenses.
// ========================================================================
package org.eclipse.jetty.util;
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertTrue;
-
import org.junit.Test;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
public class Utf8StringBuilderTest
{
@Test
- public void testInvalid()
- throws Exception
+ public void testInvalid() throws Exception
{
- String[] invalids = {
- "c0af",
- "EDA080",
- "f08080af",
- "f8808080af",
- "e080af",
- "F4908080",
- "fbbfbfbfbf"
- };
-
+ String[] invalids =
+ { "c0af", "EDA080", "f08080af", "f8808080af", "e080af", "F4908080", "fbbfbfbfbf", "10FFFF" };
+
for (String i : invalids)
{
byte[] bytes = TypeUtil.fromHexString(i);
-
- /* Test what JVM does
- try
- {
- String s = new String(bytes,0,bytes.length,"UTF-8");
- System.err.println(i+": "+s);
- }
- catch(Exception e)
- {
- System.err.println(i+": "+e);
- }
- */
-
try
{
Utf8StringBuilder buffer = new Utf8StringBuilder();
buffer.append(bytes,0,bytes.length);
-
+
assertEquals(i,"not expected",buffer.toString());
}
- catch(IllegalArgumentException e)
+ catch (Utf8Appendable.NotUtf8Exception e)
{
assertTrue(i,true);
}
}
}
-
+
@Test
- public void testUtfStringBuilder()
- throws Exception
+ public void testUtfStringBuilder() throws Exception
{
- String source="abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
+ String source = "abcd012345\n\r\u0000\u00a4\u10fb\ufffdjetty";
byte[] bytes = source.getBytes(StringUtil.__UTF8);
Utf8StringBuilder buffer = new Utf8StringBuilder();
- for (int i=0;i<bytes.length;i++)
- buffer.append(bytes[i]);
- assertEquals(source, buffer.toString());
- assertTrue(buffer.toString().endsWith("jetty"));
+ for (byte aByte : bytes)
+ buffer.append(aByte);
+ assertEquals(source,buffer.toString());
+ assertTrue(buffer.toString().endsWith("jetty"));
}
-
-
-
- @Test
- public void testShort()
- throws Exception
+
+ @Test(expected = IllegalArgumentException.class)
+ public void testShort() throws Exception
{
- String source="abc\u10fb";
+ String source = "abc\u10fb";
byte[] bytes = source.getBytes(StringUtil.__UTF8);
Utf8StringBuilder buffer = new Utf8StringBuilder();
- for (int i=0;i<bytes.length-1;i++)
+ for (int i = 0; i < bytes.length - 1; i++)
buffer.append(bytes[i]);
- try
- {
- buffer.toString();
- assertTrue(false);
- }
- catch(Utf8Appendable.NotUtf8Exception e)
- {
- assertTrue(e.toString().indexOf("!UTF-8")>=0);
- }
+ buffer.toString();
}
-
+
@Test
- public void testLong()
- throws Exception
+ public void testLong() throws Exception
{
- String source="abcXX";
+ String source = "abcXX";
byte[] bytes = source.getBytes(StringUtil.__UTF8);
- bytes[3]=(byte)0xc0;
- bytes[4]=(byte)0x00;
+ bytes[3] = (byte)0xc0;
+ bytes[4] = (byte)0x00;
Utf8StringBuilder buffer = new Utf8StringBuilder();
try
{
- for (int i = 0; i < bytes.length; i++)
- buffer.append(bytes[i]);
+ for (byte aByte : bytes)
+ buffer.append(aByte);
assertTrue(false);
}
- catch(Utf8Appendable.NotUtf8Exception e)
+ catch (IllegalArgumentException e)
{
assertTrue(true);
}
- assertEquals("abc\ufffd", buffer.toString());
+ assertEquals("abc\ufffd",buffer.toString());
}
-
- @Test
- public void testUTF32codes()
- throws Exception
+ @Test
+ public void testUTF32codes() throws Exception
{
- String source="\uD842\uDF9F";
- byte[] bytes=source.getBytes("UTF-8");
-
- // System.err.println(TypeUtil.toHexString(bytes));
+ String source = "\uD842\uDF9F";
+ byte[] bytes = source.getBytes("UTF-8");
+
String jvmcheck = new String(bytes,0,bytes.length,"UTF-8");
assertEquals(source,jvmcheck);
-
+
Utf8StringBuilder buffer = new Utf8StringBuilder();
buffer.append(bytes,0,bytes.length);
- String result=buffer.toString();
+ String result = buffer.toString();
assertEquals(source,result);
}
-
-
}

Back to the top