Bug 307311: Incorrect string display when using non-ASCII characters

author: Mathias Kunter 2012-02-02 02:49:48 +0000
committer: Marc Khouzam 2012-03-06 14:23:21 +0000
commit: a974434ec3a84966ec2c18463276be7ff8086bff (patch)
tree: 86cd798b55f2e29338418e51a3473fc64d630efc /dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt
parent: f360d64c77c71187041a08cc97bf132b7ede88c9 (diff)
download: org.eclipse.cdt-a974434ec3a84966ec2c18463276be7ff8086bff.tar.gz
org.eclipse.cdt-a974434ec3a84966ec2c18463276be7ff8086bff.tar.xz
org.eclipse.cdt-a974434ec3a84966ec2c18463276be7ff8086bff.zip
6 files changed, 534 insertions, 110 deletions
diff --git a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIConst.java b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIConst.java
index 51edc599cc6..4fc3a969cef 100644
--- a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIConst.java
+++ b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIConst.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2009 QNX Software Systems and others.
+ * Copyright (c) 2000, 2012 QNX Software Systems and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
@@ -8,6 +8,9 @@
  * Contributors:
  *     QNX Software Systems - Initial API and implementation
  *     Wind River Systems   - Modified for new DSF Reference Implementation
+ *     Mathias Kunter       - Modified for implementation of Bug 307311. Please
+ *                            find further implementation details within the
+ *                            MIStringHandler class.
  *******************************************************************************/
 
 package org.eclipse.cdt.dsf.mi.service.command.output;
@@ -16,109 +19,27 @@ package org.eclipse.cdt.dsf.mi.service.command.output;
  * GDB/MI const value represents a ios-c string.
  */
 public class MIConst extends MIValue {
-    String cstring = ""; //$NON-NLS-1$
-
+    
+    private String cstring = ""; //$NON-NLS-1$
+    
     public String getCString() {
         return cstring;
     }
-
+    
     public void setCString(String str) {
         cstring = str;
     }
-
+    
     /**
-     * Translate gdb c-string.
+     * Translates the C string value into a string which is suitable for display to a human.
+     * @return The translated string.
      */
     public String getString() {
-        return getString(cstring);
-    }
-
-    public static String getString(String str) {
-        StringBuffer buffer = new StringBuffer();
-        boolean escape = false;
-        for (int i = 0; i < str.length(); i++) {
-            char c = str.charAt(i);
-            if (c == '\\') {
-                if (escape) {
-                    buffer.append(c);
-                    escape = false;
-                } else {
-                    escape = true;
-                }
-            } else {
-                if (escape) {
-                    if (isIsoCSpecialChar(c)) {
-                        buffer.append(isoC(c));
-                    } else {
-                        buffer.append('\\');
-                        buffer.append(c);
-                    }
-                } else {
-                    buffer.append(c);
-                }
-                escape = false;
-            }
-        }
-
-        // If escape is still true it means that the
-        // last char was an '\'.
-        if (escape) {
-            buffer.append('\\');
-        }
-
-        return buffer.toString();
+        return MIStringHandler.translateCString(cstring, true);
     }
-
+    
     @Override
     public String toString() {
         return getCString();
     }
-
-    /**
-     * Assuming that the precedent character was the
-     * escape sequence '\'
-     */
-    private static String isoC(char c) {
-        String s = new Character(c).toString();
-        if (c == '"') {
-            s = "\""; //$NON-NLS-1$
-        } else if (c == '\'') {
-            s = "\'"; //$NON-NLS-1$
-        } else if (c == '?') {
-            s = "?"; //$NON-NLS-1$
-        } else if (c == 'a') {
-            s = "\007"; //$NON-NLS-1$
-        } else if (c == 'b') {
-            s = "\b"; //$NON-NLS-1$
-        } else if (c == 'f') {
-            s = "\f"; //$NON-NLS-1$
-        } else if (c == 'n') {
-            s = System.getProperty("line.separator", "\n"); //$NON-NLS-1$ //$NON-NLS-2$ $NON-NLS-2$
-        } else if (c == 'r') {
-            s = "\r"; //$NON-NLS-1$
-        } else if (c == 't') {
-            s = "\t"; //$NON-NLS-1$
-        } else if (c == 'v') {
-            s = "\013"; //$NON-NLS-1$
-        }
-        return s;
-    }
-
-    private static boolean isIsoCSpecialChar(char c) {
-        switch (c) {
-            case '"':
-            case '\'':
-            case '?':
-            case 'a':
-            case 'b':
-            case 'f':
-            case 'n':
-            case 'r':
-            case 't':
-            case 'v':
-                return true;
-        }
-        return false;
-        
-    }
 }
diff --git a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIDataEvaluateExpressionInfo.java b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIDataEvaluateExpressionInfo.java
index aaade8f8ac1..1fae2a65119 100644
--- a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIDataEvaluateExpressionInfo.java
+++ b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIDataEvaluateExpressionInfo.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2009 QNX Software Systems and others.
+ * Copyright (c) 2000, 2012 QNX Software Systems and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
@@ -8,6 +8,7 @@
  * Contributors:
  *     QNX Software Systems - Initial API and implementation
  *     Wind River Systems   - Modified for new DSF Reference Implementation
+ *     Mathias Kunter       - use MIConst.getString which is for human consumption (Bug 307311)
  *******************************************************************************/
 
 package org.eclipse.cdt.dsf.mi.service.command.output;
@@ -32,7 +33,7 @@ public class MIDataEvaluateExpressionInfo extends MIInfo {
                     if (var.equals("value")) { //$NON-NLS-1$
                         MIValue value = results[i].getMIValue();
                         if (value instanceof MIConst) {
-                            fValue = ((MIConst)value).getCString();
+                            fValue = ((MIConst)value).getString();
                         }
                     }
                 }
diff --git a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIParser.java b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIParser.java
index ccc809369a1..65a5b94ad32 100644
--- a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIParser.java
+++ b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIParser.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2009 QNX Software Systems and others.
+ * Copyright (c) 2000, 2012 QNX Software Systems and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
@@ -8,6 +8,7 @@
  * Contributors:
  *     QNX Software Systems - Initial API and implementation
  *     Wind River Systems   - Modified for new DSF Reference Implementation
+ *     Mathias Kunter       - Don't always parse backslashes (Bug 367456, Bug 307311)
  *******************************************************************************/
 
 package org.eclipse.cdt.dsf.mi.service.command.output;
@@ -216,7 +217,9 @@ public class MIParser {
             if (buffer.length() > 0 && buffer.charAt(0) == '"') {
                 buffer.deleteCharAt(0);
             }
-            stream.setCString(translateCString(new FSB(buffer)));
+            // Don't parse any backslashes - backslashes within stream records
+            // aren't escaped.
+            stream.setCString(translateCString(new FSB(buffer), false));
             oob = stream;
         } else {
             // Badly format MI line, just pass it to the user as target stream
@@ -306,7 +309,9 @@ public class MIParser {
             } else if (buffer.charAt(0) == '"') {
                 buffer.deleteCharAt(0);
                 MIConst cnst = new MIConst();
-                cnst.setCString(translateCString(buffer));
+                // Parse backslashes - backslashes within result
+                // and out of band records are escaped.
+                cnst.setCString(translateCString(buffer, true));
                 value = cnst;
             }
         }
@@ -382,15 +387,20 @@ public class MIParser {
         return list;
     }
 
-    /*
-     * MI C-String rather MICOnst values are enclose in double quotes
-     * and any double quotes or backslash in the string are escaped.
-     * Assuming the starting double quote was removed.
-     * This method will stop at the closing double quote remove the extra
-     * backslach escaping and return the string __without__ the enclosing double quotes
-     * The orignal StringBuffer will move forward.
+    /**
+     * MI C-String rather MIConst values are enclosed in double quotes
+     * and any double quotes or backslashes in the string are escaped.
+     * Assuming the starting double quote was removed. This method will
+     * stop at the closing double quote, remove the extra backslash escaping
+     * and return the string __without__ the enclosing double quotes. The
+     * original string buffer will move forward.
+     * @param buffer The string buffer to read from.
+     * @param parseBackslashes Defines whether backslashes should be parsed.
+     * This parameter is necessary to differentiate between records which
+     * contain escaped backslashes and records which do not.
+     * @return The translated C string.
      */
-    private String translateCString(FSB buffer) {
+    private String translateCString(FSB buffer, boolean parseBackslashes) {
         boolean escape = false;
         boolean closingQuotes = false;
 
@@ -402,7 +412,9 @@ public class MIParser {
             if (c == '\\') {
                 if (escape) {
                     sb.append(c);
-                    sb.append(c);
+                    if (!parseBackslashes) {
+                        sb.append(c);
+                    }
                     escape = false;
                 } else {
                     escape = true;
diff --git a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIStreamRecord.java b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIStreamRecord.java
index 86024bcf048..2252bb3c66f 100644
--- a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIStreamRecord.java
+++ b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIStreamRecord.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2009 QNX Software Systems and others.
+ * Copyright (c) 2000, 2012 QNX Software Systems and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
@@ -8,6 +8,7 @@
  * Contributors:
  *     QNX Software Systems - Initial API and implementation
  *     Wind River Systems   - Modified for new DSF Reference Implementation
+ *     Mathias Kunter       - Modified to use the new MIStringHandler class (Bug 307311)
  *******************************************************************************/
 
 package org.eclipse.cdt.dsf.mi.service.command.output;
@@ -28,7 +29,8 @@ public abstract class MIStreamRecord extends MIOOBRecord {
     } 
 
     public String getString () {
-        return MIConst.getString(getCString());
+        // Return the translated C string without escaping any special characters.
+        return MIStringHandler.translateCString(getCString(), false);
     }
 
     @Override
diff --git a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIStringHandler.java b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIStringHandler.java
new file mode 100644
index 00000000000..a5a4a770583
--- /dev/null
+++ b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIStringHandler.java
@@ -0,0 +1,487 @@
+/*******************************************************************************
+ * Copyright (c) 2012 Mathias Kunter and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ * 
+ * Contributors:
+ *     Mathias Kunter       - Initial Implementation (Bug 307311)
+ *******************************************************************************/
+package org.eclipse.cdt.dsf.mi.service.command.output;
+
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CharsetEncoder;
+import java.text.ParseException;
+import java.util.EnumSet;
+
+/**
+ * The MIStringHandler class provides several static functions to handle C and / or MI strings.
+ * @since 4.1
+ */
+public class MIStringHandler {
+    
+    /**
+     * Defines special characters which are used within escape notations to represent a
+     * corresponding Unicode code point (i.e. character code). See the specialCodePoints
+     * list below on the same index for the corresponding code point.
+     */
+    private static char[] specialChars = new char[] {
+        'a',    // Alert (bell) character
+        'b',    // Backspace character
+        'e',    // GNU extension: Escape character
+        'E',    // same as 'e'
+        'f',    // Form feed character
+        'n',    // New line character
+        'r',    // Carriage return character
+        't',    // Horizontal tabulation character
+        'v',    // Vertical tabulation character
+        '\'',   // Single quotation mark
+        '"',    // Double quotation mark
+        '\\',   // Backslash
+        '?'     // Literal question mark
+    };
+    
+    /**
+     * Defines special Unicode code points (i.e. character codes) which can be escaped by a
+     * corresponding special character. See the specialChars list above on the same index
+     * for the corresponding special character.
+     */
+    private static int[] specialCodePoints = new int[] {
+        0x07,    // corresponds to \a
+        0x08,    // corresponds to \b
+        0x1B,    // corresponds to \e
+        0x1B,    // corresponds to \E
+        0x0C,    // corresponds to \f
+        0x0A,    // corresponds to \n
+        0x0D,    // corresponds to \r
+        0x09,    // corresponds to \t
+        0x0B,    // corresponds to \v
+        0x27,    // corresponds to \'
+        0x22,    // corresponds to \"
+        0x5C,    // corresponds to \\
+        0x3F     // corresponds to \?
+    };
+    
+    /**
+     * An internal helper enumeration which holds the current status while parsing an escaped
+     * text sequence.
+     */
+    private enum EscapeStatus {
+        NONE,
+        BEGIN,
+        OCTAL_NUMBER,
+        HEX_NUMBER,
+        UNICODE_SHORT_NUMBER,
+        UNICODE_LONG_NUMBER,
+        VALID,
+        INVALID
+    }
+    
+    /**
+     * An enumeration defining the escape sequences which should be parsed.
+     */
+    public enum ParseFlags {
+        SPECIAL_CHARS,
+        OCTAL_NUMBERS,
+        HEX_NUMBERS,
+        UNICODE_SHORT_NUMBERS,
+        UNICODE_LONG_NUMBERS
+    }
+    
+    /**
+     * Translates the given C string into a string suitable for display. This includes handling
+     * of escaped characters and different string encodings. This is necessary in order to correctly
+     * deal with non-ASCII strings.
+     * @param str The C string to translate.
+     * @param escapeChars Defines whether non-printable characters should be escaped within
+     * the translated string, or not.
+     * @return The translated string.
+     */
+    public static String translateCString(String str, boolean escapeChars) {
+        if (escapeChars) {
+            // Don't parse the special character escape notations here. We can do this here because
+            // we want to keep them in their escaped form anyway, and because the following string
+            // transcoding process isn't affected by escaped special chars. By doing so we avoid
+            // caring about some nasty details of the special character escaping process: for
+            // example, single quotation marks are commonly only escaped within character constants,
+            // while double quotation marks are commonly only escaped within string constants. By
+            // not parsing the special character escape notations at all here, we just keep the
+            // original special character escaping provided by the given MI string.
+            str = parseString(str, EnumSet.complementOf(EnumSet.of(ParseFlags.SPECIAL_CHARS)));
+        } else {
+            // Parse all escaped characters.
+            str = parseString(str);
+        }
+        
+        // Transcode the string in order to handle non-ASCII strings correctly.
+        str = transcodeString(str);
+        
+        if (escapeChars) {
+            // Escape any non-printable characters again, as we want to be able to display them.
+            // However, don't escape any printable special chars, as they haven't been parsed before.
+            str = escapeString(str, false);
+        } else {
+            // No escaping necessary here. We however have to make sure that we use the correct line
+            // separation character sequence.
+            str = str.replace("\n", System.getProperty("line.separator", "\n")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
+        }
+        
+        return str;
+    }
+    
+    /**
+     * Returns whether the given character is a special character, or not.
+     * @param c The character to test.
+     * @return The test result.
+     */
+    public static boolean isSpecialChar(char c) {
+        for (int i = 0; i < specialChars.length; i++) {
+            if (specialChars[i] == c) {
+                return true;
+            }
+        }
+        return false;
+    }
+    
+    /**
+     * Returns whether the given Unicode code point is a special code point, or not.
+     * @param codePoint The Unicode code point to test.
+     * @return The test result.
+     */
+    public static boolean isSpecialCodePoint(int codePoint) {
+        for (int i = 0; i < specialCodePoints.length; i++) {
+            if (specialCodePoints[i] == codePoint) {
+                return true;
+            }
+        }
+        return false;
+    }
+    
+    /**
+     * Parses the given special character into an Unicode code point.
+     * @param c The special character to parse.
+     * @return The parsed Unicode code point.
+     * @throws ParseException Thrown when the given character can't be parsed. This happens when it's
+     * not a special character.
+     */
+    public static int parseSpecialChar(char c) throws ParseException {
+        for (int i = 0; i < specialChars.length; i++) {
+            if (specialChars[i] == c) {
+                return specialCodePoints[i];
+            }
+        }
+        throw new ParseException("The given character '" + c + "' is not a special character.", 0); //$NON-NLS-1$ //$NON-NLS-2$
+    }
+    
+    /**
+     * Parses the given special Unicode code point into a character.
+     * @param codePoint The special Unicode code point to parse.
+     * @return The parsed character.
+     * @throws ParseException Thrown when the given Unicode code point can't be parsed. This happens
+     * when it's not a special code point.
+     */
+    public static char parseSpecialCodePoint(int codePoint) throws ParseException {
+        for (int i = 0; i < specialCodePoints.length; i++) {
+            if (specialCodePoints[i] == codePoint) {
+                return specialChars[i];
+            }
+        }
+        throw new ParseException("The given Unicode code point " + codePoint + " is not a special code point.", 0); //$NON-NLS-1$ //$NON-NLS-2$
+    }
+    
+    /**
+     * This is an overloaded function. See the Javadoc of the other function overload for details.
+     * @param str The string which should be parsed.
+     * @return The parsed string.
+     */
+    public static String parseString(String str) {
+        return parseString(str, EnumSet.allOf(ParseFlags.class));
+    }
+
+    /**
+     * Parses any escaped characters and replaces them with their corresponding Unicode code points.
+     * This function parses all escape notations which are supported by gcc and / or gdb. Those are:</br></br>
+     * 
+     * <ul>
+     * <li>Special char escape notations: \a, \b, \e, \E, \f, \n, \r, \t, \v, \', \", \\, and \?</li>
+     * 
+     * <li>Octal escape notation: An initial backslash, followed by 1, 2, or 3 octal digits. Values
+     * above 0xFF are ignored. Octal escape notations may not use more than 3 octal digits.</li>
+     * 
+     * <li>Hexadecimal escape notation: An initial backslash, followed by an "x" and 1 or more
+     * hexadecimal digits. Hexadecimal escape notations may not use more than 4 hexadecimal digits
+     * (although gcc accepts hexadecimal escape notations of any arbitrary length).</li>
+     * 
+     * <li>Short Unicode escape notation: An initial backslash, followed by an "u" and exactly 4
+     * hexadecimal digits.</li>
+     * 
+     * <li>Long Unicode escape notation: An initial backslash, followed by an "U" and exactly 8
+     * hexadecimal digits.</li>
+     * </ul>
+     * @param str The string which should be parsed.
+     * @param parseFlags The set of escape notations which should be parsed.
+     * @return The parsed string.
+     */
+    public static String parseString(String str, EnumSet<ParseFlags> parseFlags) {
+        StringBuffer buffer = new StringBuffer();
+        StringBuffer escapeBuffer = new StringBuffer();
+        EscapeStatus escStatus = EscapeStatus.NONE;
+        
+        for (int i = 0; i < str.length(); i++) {
+            char c = str.charAt(i);
+            boolean consumeChar = true;
+            boolean isLastChar = i == str.length() - 1;
+            
+            if (escStatus == EscapeStatus.NONE) {
+                if (c == '\\') {
+                    // Escaping begins. Reset the escape buffer.
+                    escapeBuffer.setLength(0);
+                    escapeBuffer.append(c);
+                    escStatus = EscapeStatus.BEGIN;
+                }
+            } else if (escStatus == EscapeStatus.BEGIN) {
+                if (parseFlags.contains(ParseFlags.SPECIAL_CHARS) && isSpecialChar(c)) {
+                    try {
+                        buffer.appendCodePoint(parseSpecialChar(c));
+                        escStatus = EscapeStatus.VALID;
+                    } catch (ParseException e) {
+                        // This is just for completeness. We will actually never catch any ParseException here
+                        // since we already checked the character with isSpecialChar() before.
+                        escapeBuffer.append(c);
+                        escStatus = EscapeStatus.INVALID;
+                    }
+                } else if (parseFlags.contains(ParseFlags.OCTAL_NUMBERS) && c >= '0' && c <= '7') {
+                    escStatus = EscapeStatus.OCTAL_NUMBER;
+                    // Don't consume this character right now - as this wouldn't work if it's the last character.
+                    consumeChar = false;
+                } else if (parseFlags.contains(ParseFlags.HEX_NUMBERS) && c == 'x') {
+                    escStatus = EscapeStatus.HEX_NUMBER;
+                } else if (parseFlags.contains(ParseFlags.UNICODE_SHORT_NUMBERS) && c == 'u') {
+                    escStatus = EscapeStatus.UNICODE_SHORT_NUMBER;
+                } else if (parseFlags.contains(ParseFlags.UNICODE_LONG_NUMBERS) && c == 'U') {
+                    escStatus = EscapeStatus.UNICODE_LONG_NUMBER;
+                } else {
+                    escStatus = EscapeStatus.INVALID;
+                }
+                if (consumeChar) {
+                    escapeBuffer.append(c);
+                }
+            } else if (escStatus == EscapeStatus.HEX_NUMBER) {
+                // Only consume this character if it belongs to the escape sequence.
+                consumeChar = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+                if (consumeChar) {
+                    escapeBuffer.append(c);
+                }
+                
+                if (!consumeChar || isLastChar || escapeBuffer.length() == 6) {
+                    // The escape sequence is terminated. Set the escape status to invalid until
+                    // we know that it's actually valid.
+                    escStatus = EscapeStatus.INVALID;
+                    if (escapeBuffer.length() > 2) {
+                        // Decode the hexadecimal number.
+                        try {
+                            int codePoint = Integer.parseInt(escapeBuffer.toString().substring(2), 16);
+                            if (codePoint <= 0x10FFFF) {
+                                buffer.appendCodePoint(codePoint);
+                                escStatus = EscapeStatus.VALID;
+                            }
+                        } catch (NumberFormatException e) {
+                        }
+                    }
+                }
+            } else if (escStatus == EscapeStatus.UNICODE_SHORT_NUMBER || escStatus == EscapeStatus.UNICODE_LONG_NUMBER) {
+                // Only consume this character if it belongs to the escape sequence.
+                consumeChar = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F');
+                if (consumeChar) {
+                    escapeBuffer.append(c);
+                }
+                
+                int finalLength = escStatus == EscapeStatus.UNICODE_SHORT_NUMBER ? 6 : 10;
+                if (escapeBuffer.length() == finalLength) {
+                    // The escape sequence is terminated. Set the escape status to invalid until
+                    // we know that it's actually valid. Decode the hexadecimal number.
+                    escStatus = EscapeStatus.INVALID;
+                    try {
+                        int codePoint = Integer.parseInt(escapeBuffer.toString().substring(2), 16);
+                        if (codePoint <= 0x10FFFF) {
+                            buffer.appendCodePoint(codePoint);
+                            escStatus = EscapeStatus.VALID;
+                        }
+                    } catch (NumberFormatException e) {
+                    }
+                } else if (!consumeChar || isLastChar) {
+                    // The escape sequence is terminated and invalid.
+                    escStatus = EscapeStatus.INVALID;
+                }
+            } else if (escStatus == EscapeStatus.OCTAL_NUMBER) {
+                // Only consume this character if it belongs to the escape sequence.
+                consumeChar = c >= '0' && c <= '7';
+                if (consumeChar) {
+                    escapeBuffer.append(c);
+                }
+                
+                if (!consumeChar || isLastChar || escapeBuffer.length() == 4) {
+                    // The escape sequence is terminated. Set the escape status to invalid until
+                    // we know that it's actually valid.
+                    escStatus = EscapeStatus.INVALID;
+                    if (escapeBuffer.length() > 1) {
+                        // Decode the octal number.
+                        try {
+                            int codePoint = Integer.parseInt(escapeBuffer.toString().substring(1), 8);
+                            if (codePoint <= 0xFF) {
+                                buffer.appendCodePoint(codePoint);
+                                escStatus = EscapeStatus.VALID;
+                            }
+                        } catch (NumberFormatException e) {
+                        }
+                    }
+                }
+            }
+            
+            if (escStatus == EscapeStatus.NONE) {
+                // Current character isn't escaped - copy it over to the destination buffer.
+                buffer.append(c);
+            } else if (escStatus == EscapeStatus.VALID) {
+                escStatus = EscapeStatus.NONE;
+            } else if (escStatus == EscapeStatus.INVALID) {
+                buffer.append(escapeBuffer);
+                escStatus = EscapeStatus.NONE;
+            }
+            
+            if (!consumeChar) {
+                // Don't consume the current character.
+                i--;
+            }
+        }
+        
+        // Check for non-finished escape sequences at the end of the string.
+        if (escStatus != EscapeStatus.NONE) {
+            buffer.append(escapeBuffer);
+        }
+        
+        // Convert the buffer into a string and return it.
+        return buffer.toString();
+    }
+    
+    /**
+     * Transcodes the given string. This is done as follows:</br></br>
+     * 1) The given string is encoded into a binary byte buffer.</br></br>
+     * 2) It's tested whether this binary byte buffer seems to represent a string which is encoded as
+     * either ASCII, Latin-1, or UTF-8. If this is the case, the binary byte buffer is decoded back into
+     * a string and this string is returned. If the test is negative, the given string is returned without
+     * modification because its encoding can't be reliably determined in this case.
+     * The most important use case of this function is to transcode a string which is actually UTF-8 but has
+     * been incorrectly decoded as Latin-1 instead.
+     * @param str The string to transcode.
+     * @return The transcoded string.
+     */
+    public static String transcodeString(String str) {
+        // Try to transcode the string from Latin-1 to UTF-8 (ASCII doesn't need to be explicitly
+        // considered here since Latin-1 is backwards compatible with ASCII). The transcoding will
+        // almost certainly only succeed if the string actually *is* encoded in UTF-8. If the
+        // transcoding fails, the string is simply left unchanged.
+        try {
+            // First, try to encode the string as Latin-1 in order to obtain the binary byte
+            // representation of the string.
+            CharsetEncoder latin1Encoder = Charset.forName("ISO-8859-1").newEncoder(); //$NON-NLS-1$
+            ByteBuffer stringBytes = latin1Encoder.encode(CharBuffer.wrap(str.toCharArray()));
+            
+            // Next, try to decode the string as UTF-8. This will almost certainly only succeed
+            // if the string actually *is* encoded in UTF-8. Note that if the decoding fails,
+            // an exception is thrown before the str variable is assigned. The original string
+            // is therefore left unchanged in this case.
+            CharsetDecoder utf8Decoder = Charset.forName("UTF-8").newDecoder(); //$NON-NLS-1$
+            str = utf8Decoder.decode(stringBytes).toString();
+        } catch (Exception e) {
+        }
+        
+        return str;
+    }
+    
+    /**
+     * Escapes any non-printable characters as well as the printable special characters single quotation
+     * mark, double quotation mark, backslash, and literal question mark within the given string. Supports
+     * the entire Unicode code space.
+     * @param str The string which should be escaped.
+     * @return The escaped string.
+     */
+    public static String escapeString(String str) {
+        return escapeString(str, true);
+    }
+    
+    /**
+     * Escapes any non-printable characters within the given string. Supports the entire Unicode code space.
+     * @param str The string which should be escaped.
+     * @param escapePrintableSpecialChars Defines whether the printable special characters single
+     * quotation mark, double quotation mark, backslash, and literal question mark should be
+     * escaped as well, or not.
+     * @return The escaped string.
+     */
+    public static String escapeString(String str, boolean escapePrintableSpecialChars) {
+        StringBuffer buffer = new StringBuffer();
+        
+        for (int i = 0; i < str.length(); i++) {
+            // Get the current character code point. Note that using the Java "char" data type isn't
+            // sufficient here, as it can't handle all Unicode characters.
+            int codePoint = str.codePointAt(i);
+            if (Character.isSupplementaryCodePoint(codePoint)) {
+                i++;
+            }
+            
+            // Check the code point type of the character in order to determine whether it's
+            // printable or not.
+            int codePointType = Character.getType(codePoint);
+            switch (codePointType) {
+                case Character.LINE_SEPARATOR:
+                case Character.PARAGRAPH_SEPARATOR:
+                case Character.CONTROL:
+                case Character.PRIVATE_USE:
+                case Character.SURROGATE:
+                case Character.UNASSIGNED:
+                    // Non-printable character.
+                    if (isSpecialCodePoint(codePoint)) {
+                        // Escape by using the special character escape notation.
+                        buffer.append('\\');
+                        try {
+                            buffer.append(parseSpecialCodePoint(codePoint));
+                        } catch (ParseException e) {
+                            buffer.appendCodePoint(codePoint);
+                        }
+                    } else if (codePoint == 0x00) {
+                        // Escape the null character separately - don't use leading zeros.
+                        buffer.append("\\0"); //$NON-NLS-1$
+                    } else if (codePoint <= 0xFF) {
+                        // Escape by using the octal escape notation.
+                        buffer.append(String.format("\\%03o", codePoint)); //$NON-NLS-1$
+                    } else if (codePoint <= 0xFFFF) {
+                        // Escape by using the short Unicode escape notation.
+                        buffer.append(String.format("\\u%04x", codePoint)); //$NON-NLS-1$
+                    } else {
+                        // Escape by using the long Unicode escape notation.
+                        buffer.append(String.format("\\U%08x", codePoint)); //$NON-NLS-1$
+                    }
+                    break;
+                default:
+                    // Printable character.
+                    if (escapePrintableSpecialChars && isSpecialCodePoint(codePoint)) {
+                        // Escape by using the special character escape notation.
+                        buffer.append('\\');
+                        try {
+                            buffer.append(parseSpecialCodePoint(codePoint));
+                        } catch (ParseException e) {
+                            buffer.appendCodePoint(codePoint);
+                        }
+                    } else {
+                        // Don't escape.
+                        buffer.appendCodePoint(codePoint);
+                    }
+            }
+        }
+        
+        return buffer.toString();
+    }
+}
diff --git a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIVarEvaluateExpressionInfo.java b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIVarEvaluateExpressionInfo.java
index 2541fda5fbc..0a73b5aca2c 100644
--- a/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIVarEvaluateExpressionInfo.java
+++ b/dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt/dsf/mi/service/command/output/MIVarEvaluateExpressionInfo.java
@@ -1,5 +1,5 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2009 QNX Software Systems and others.
+ * Copyright (c) 2000, 2012 QNX Software Systems and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
@@ -8,6 +8,7 @@
  * Contributors:
  *     QNX Software Systems - Initial API and implementation
  *     Wind River Systems   - Modified for new DSF Reference Implementation
+ *     Mathias Kunter       - use MIConst.getString which is for human consumption (Bug 307311)
  *******************************************************************************/
 package org.eclipse.cdt.dsf.mi.service.command.output;
 
@@ -30,7 +31,7 @@ public class MIVarEvaluateExpressionInfo extends MIInfo {
     				if (var.equals("value")) { //$NON-NLS-1$
     					MIValue val = results[i].getMIValue();
     					if (val instanceof MIConst) {
-    						value = ((MIConst)val).getCString();
+    						value = ((MIConst)val).getString();
     					}
     				}
     			}
author	Mathias Kunter	2012-02-02 02:49:48 +0000
committer	Marc Khouzam	2012-03-06 14:23:21 +0000
commit	a974434ec3a84966ec2c18463276be7ff8086bff (patch)
tree	86cd798b55f2e29338418e51a3473fc64d630efc /dsf-gdb/org.eclipse.cdt.dsf.gdb/src/org/eclipse/cdt
parent	f360d64c77c71187041a08cc97bf132b7ede88c9 (diff)
download	org.eclipse.cdt-a974434ec3a84966ec2c18463276be7ff8086bff.tar.gz org.eclipse.cdt-a974434ec3a84966ec2c18463276be7ff8086bff.tar.xz org.eclipse.cdt-a974434ec3a84966ec2c18463276be7ff8086bff.zip