diff options
| author | Joerg Kubitz | 2021-03-28 17:27:31 +0000 |
|---|---|---|
| committer | Manoj Palat | 2021-06-17 13:38:29 +0000 |
| commit | 680081fceaccc04731bfd58151734b054356a6d1 (patch) | |
| tree | 8c79fc16b233d2d3f19531bc0f88e7958b86d09c | |
| parent | dcdb41213d2a0c255037b790a64514a84fade08e (diff) | |
| download | eclipse.jdt.core-680081fceaccc04731bfd58151734b054356a6d1.tar.gz eclipse.jdt.core-680081fceaccc04731bfd58151734b054356a6d1.tar.xz eclipse.jdt.core-680081fceaccc04731bfd58151734b054356a6d1.zip | |
Bug 572372 - [performance] improve getInputStreamAsCharArrayI20210617-1800
Streamless implementation. It's ~ 2x faster for small files.
getInputStreamAsCharArray is a hotspot during
CompilationUnit.getContents().
Change-Id: I7daf60e52cbc34997ba24164285f769f604cc515
Signed-off-by: Joerg Kubitz <jkubitz-eclipse@gmx.de>
Reviewed-on: https://git.eclipse.org/r/c/jdt/eclipse.jdt.core/+/178470
Tested-by: JDT Bot <jdt-bot@eclipse.org>
Reviewed-by: Manoj Palat <manpalat@in.ibm.com>
| -rw-r--r-- | org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/util/Util.java | 97 |
1 files changed, 53 insertions, 44 deletions
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/util/Util.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/util/Util.java index 3d50aca7f5..f7ab82b7d5 100644 --- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/util/Util.java +++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/util/Util.java @@ -16,21 +16,23 @@ package org.eclipse.jdt.internal.compiler.util; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.io.PrintWriter; import java.io.StringWriter; -import java.io.UnsupportedEncodingException; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.Charset; import java.util.ArrayList; import java.util.Arrays; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.StringTokenizer; import java.util.zip.ZipEntry; @@ -547,6 +549,14 @@ public class Util implements SuffixConstants { return (byteBuf.length == byteCount) ? byteBuf : Arrays.copyOf(byteBuf, byteCount); } + private static Map<String, byte[]> bomByEncoding = new HashMap<String, byte[]>(); + static { + // org.eclipse.core.runtime.content.IContentDescription.BOM_UTF_8: + bomByEncoding.put("UTF-8", new byte[] { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF }); //$NON-NLS-1$ + // XXX UTF-16, UTF-32 may have BOM too + // @see org.eclipse.core.runtime.content.IContentDescription.BOM_UTF_16BE ,.. + } + /** * Returns the given input stream's contents as a character array. * Note this doesn't close the stream. @@ -554,55 +564,54 @@ public class Util implements SuffixConstants { */ public static char[] getInputStreamAsCharArray(InputStream stream, String encoding) throws IOException { - //XXX java.nio.file.Files.readString().toCharArray() is faster on recent JDKs - BufferedReader reader = null; + byte[] byteContents = getInputStreamAsByteArray(stream); + + Charset charset; try { - reader = encoding == null - ? new BufferedReader(new InputStreamReader(stream)) - : new BufferedReader(new InputStreamReader(stream, encoding)); - } catch (UnsupportedEncodingException e) { + charset = Charset.forName(encoding); + } catch (IllegalArgumentException e) { // encoding is not supported - reader = new BufferedReader(new InputStreamReader(stream)); + charset = Charset.defaultCharset(); } - char[] contents = CharOperation.NO_CHAR; - int totalRead = 0; - while (true) { - int amountRequested; - { - // reading beyond known length - int current = reader.read(); - if (current < 0) break; + // check for BOM in encoded byte content + // (instead of after decoding to avoid array copy after decoding): + byte[] bom = bomByEncoding.get(charset.name()); + int start; + if (bom != null && startsWith(byteContents, bom)) { + start = bom.length; // skip BOM + } else { + start = 0; + } - amountRequested = Math.max(stream.available(), DEFAULT_READING_SIZE); // read at least 8K + return decode(byteContents, start, byteContents.length - start, charset); + } - // resize contents if needed - if (totalRead + 1 + amountRequested > contents.length) - System.arraycopy(contents, 0, contents = new char[totalRead + 1 + amountRequested], 0, totalRead); + /** + * conversionless inmplementation of + * + * @return new String(srcBytes, start, length, charset).toCharArray(); + **/ + private static char[] decode(byte[] srcBytes, int start, int length, Charset charset) { + ByteBuffer srcBuffer = ByteBuffer.wrap(srcBytes, start, length); + CharBuffer destBuffer = charset.decode(srcBuffer); + char[] dst = destBuffer.array(); + int chars = destBuffer.remaining(); + if (chars != dst.length) { + dst = Arrays.copyOf(dst, chars); + } + return dst; + } - // add current character - contents[totalRead++] = (char) current; // coming from totalRead==length - } - // read as many chars as possible - int amountRead = reader.read(contents, totalRead, amountRequested); - if (amountRead < 0) break; - totalRead += amountRead; - } - - // Do not keep first character for UTF-8 BOM encoding - int start = 0; - if (totalRead > 0 && UTF_8.equals(encoding)) { - if (contents[0] == 0xFEFF) { // if BOM char then skip - totalRead--; - start = 1; - } + private static boolean startsWith(byte[] a, byte[] start) { + if (a.length < start.length) { + return false; } - - // resize contents if necessary - if (totalRead < contents.length) - System.arraycopy(contents, start, contents = new char[totalRead], 0, totalRead); - - return contents; + for (int i = 0; i < start.length; i++) { + if (a[i] != start[i]) + return false; + } + return true; } /** |
