diff options
author | Srikanth | 2012-07-14 07:43:31 +0000 |
---|---|---|
committer | Srikanth | 2012-07-14 07:43:31 +0000 |
commit | e0956e75ff6cb3066016adc9dae90f9c1534dda4 (patch) | |
tree | 8b95f618d5e1dae1482a7f9a4f74d84c1581a73a | |
parent | 4c2900096a422605a0884d5ca39839064ce0c89a (diff) | |
download | eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.tar.gz eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.tar.xz eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.zip |
Rework of lookahead strategy
5 files changed, 146 insertions, 53 deletions
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/ConflictedParser.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/ConflictedParser.java new file mode 100644 index 0000000000..a795f04a0d --- /dev/null +++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/ConflictedParser.java @@ -0,0 +1,23 @@ +/******************************************************************************* + * Copyright (c) 2000, 2012 IBM Corporation and others. + * All rights reserved. This program and the accompanying materials + * are made available under the terms of the Eclipse Public License v1.0 + * which accompanies this distribution, and is available at + * http://www.eclipse.org/legal/epl-v10.html + * + * This is an implementation of an early-draft specification developed under the Java + * Community Process (JCP) and is made available for testing and evaluation purposes + * only. The code is not compatible with any specification of the JCP. + * + * Contributors: + * IBM Corporation - initial API and implementation + *******************************************************************************/ +package org.eclipse.jdt.internal.compiler.parser; + +public interface ConflictedParser { + + /* Return true if at the configuration the parser finds itself in, token would need to be disambiguated. + At Java SE 8 time, we have two tokens that need to clarified: the use of '( and that of '<' + */ + boolean atConflictScenario(int token); +} diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java index 9075565d1b..b437cbecab 100644 --- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java +++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java @@ -145,7 +145,7 @@ import org.eclipse.jdt.internal.compiler.problem.ProblemSeverities; import org.eclipse.jdt.internal.compiler.util.Messages; import org.eclipse.jdt.internal.compiler.util.Util; -public class Parser implements ParserBasicInformation, TerminalTokens, OperatorIds, TypeIds { +public class Parser implements ConflictedParser, ParserBasicInformation, TerminalTokens, OperatorIds, TypeIds { protected static final int THIS_CALL = ExplicitConstructorCall.This; protected static final int SUPER_CALL = ExplicitConstructorCall.Super; @@ -998,6 +998,7 @@ private boolean shouldDeferRecovery = false; // https://bugs.eclipse.org/bugs/sh private int valueLambdaNestDepth = -1; private int stateStackLengthStack[] = new int[0]; private boolean parsingJava8Plus; +private int unstackedAct = ERROR_ACTION; public Parser(ProblemReporter problemReporter, boolean optimizeStringLiterals) { @@ -10901,6 +10902,67 @@ protected void optimizedConcatNodeLists() { this.astLengthStack[--this.astLengthPtr]++; } +public boolean atConflictScenario(int token) { + + /* Answer true if the parser is at a configuration where the scanner must look ahead and help disambiguate between (a) '<' as an operator and '<' as the + start of <type argument> and (b) the use of '(' in '(' expression ')' and '( type ')' and '(' lambda formal parameters ')'. When requested thus, + the scanner helps by fabricating synthetic tokens and injecting them into the stream ahead of the tokens that trigger conflicts in the absence + of these artificial tokens. These manufactured token help transform the grammar into LALR(1) by splitting the states so that they have unambigious + prefixes. + + We do this by claiming to the automaton that the next token seen is the (suitable) synthetic token and observing the response of the state machine. + Error signals we are NOT at a conflict site, while shift or shift/reduce signals that we are. Accept is impossible, while there may be intermediate + reductions that are called for -- It is axiomatic of the push down automaton that corresponds to the LALR grammar that it will never shift on invalid + input. + + Obviously, the dry runs should not alter the parser state in any way or otherwise cause side effects. Proof by argument that this is the case: + + - The only pieces of state needed to answer the question are: this.stack, this.stateStackTop and the last action variable `act`. None of the various + and sundry stacks used in the AST constructions process are touched here. + - As we reduce, we DON'T call the semantic action functions i.e the consume* method calls are skipped. + - Lexer stream is left untouched. + - this.stateStackTop and the last action variable `act` of the automaton are readily cloned, these being primitives and changes are to the replicas. + - We never remove elements from the state stack here (or elsewhere for that matter). Pops are implemented by mere adjustments of the stack pointer. + - During this algorithm, either the stack pointer monotonically decreases or stays fixed. (The only way for the stack pointer to increase would call + for a shift or a shift/reduce at which point the algorithm is ready to terminate already.) This means that we don't have to replicate the stack. + Pushes can be mimiced by writing to a local stackTopState variable, leaving the original stack untouched. + + Though this code looks complex, we should exit early in most situations. + */ + int lastAction = this.unstackedAct; + if (lastAction == ERROR_ACTION) { // automaton is not running. + return false; + } + int stackTop = this.stateStackTop; // local copy of stack pointer + int stackTopState = this.stack[stackTop]; // single cell non write through "alternate stack" - the automaton's stack pointer either stays fixed during this manoeuvre or monotonically decreases. + int highWaterMark = stackTop; + + token = token == TokenNameLPAREN ? TokenNameBeginLambda : TokenNameBeginTypeArguments; + + // A rotated version of the automaton - cf. parse()'s for(;;) + for (;;) { + if (lastAction > ERROR_ACTION) { /* shift-reduce on loop entry from above, reduce on loop back */ + lastAction -= ERROR_ACTION; + do { /* reduce */ + stackTop -= rhs[lastAction] - 1; + if (stackTop < highWaterMark) { + stackTopState = this.stack[highWaterMark = stackTop]; + } // else stackTopState is upto date already. + lastAction = ntAction(stackTopState, lhs[lastAction]); + } while (lastAction <= NUM_RULES); + } + highWaterMark = ++stackTop; + stackTopState = lastAction; // "push" + lastAction = tAction(lastAction, token); // can be looked up from a precomputed cache. + if (lastAction <= NUM_RULES) { + stackTop --; + lastAction += ERROR_ACTION; + continue; + } + // Error => false, Shift, Shift/Reduce => true, Accept => impossible. + return lastAction != ERROR_ACTION; + } +} /*main loop of the automat When a rule is reduced, the method consumeRule(int) is called with the number of the consumed rule. When a terminal is consumed, the method consumeToken(int) is @@ -10922,6 +10984,9 @@ protected void parse() { int act = START_STATE; this.stateStackTop = -1; this.currentToken = getFirstToken(); + +try { + this.scanner.setActiveParser(this); ProcessTerminals : for (;;) { int stackLength = this.stack.length; if (++this.stateStackTop >= stackLength) { @@ -10974,6 +11039,7 @@ protected void parse() { this.recordStringLiterals = oldValue; } try { + this.unstackedAct = act; this.currentToken = this.scanner.getNextToken(); } catch(InvalidInputException e){ if (!this.hasReportedError){ @@ -10983,6 +11049,8 @@ protected void parse() { this.lastCheckPoint = this.scanner.currentPosition; this.currentToken = 0; this.restartRecovery = true; + } finally { + this.unstackedAct = ERROR_ACTION; } if(this.statementRecoveryActivated) { jumpOverType(); @@ -11003,6 +11071,7 @@ protected void parse() { this.recordStringLiterals = oldValue; } try{ + this.unstackedAct = act; this.currentToken = this.scanner.getNextToken(); } catch(InvalidInputException e){ if (!this.hasReportedError){ @@ -11012,6 +11081,8 @@ protected void parse() { this.lastCheckPoint = this.scanner.currentPosition; this.currentToken = 0; this.restartRecovery = true; + } finally { + this.unstackedAct = ERROR_ACTION; } if(this.statementRecoveryActivated) { jumpOverType(); @@ -11047,6 +11118,9 @@ protected void parse() { System.out.println("----------------------------------------"); //$NON-NLS-1$ } } +} finally { + this.scanner.setActiveParser(null); +} if (DEBUG_AUTOMATON) { System.out.println("- End ----------------------------------"); //$NON-NLS-1$ diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java index c902a03872..4008acc62f 100644 --- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java +++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java @@ -193,42 +193,7 @@ public class Scanner implements TerminalTokens { private final boolean scanningJava8Plus; private VanguardScanner vanguardScanner; private VanguardParser vanguardParser; - - private int lookAheadState = START_STATE; - private static int[][] lookAheadTable; - - private static final int START_STATE = 0; - private static final int POST_LAMBDA_PREFIX = 1; - private static final int POST_IDENTIFIER = 2; - private static final int POST_BEGIN_TYPE_ARGUMENTS = 3; - private static final int TOTAL_LOOKAHEAD_STATES = 4; - - static { - int maxTerminals = ParserBasicInformation.NUM_TERMINALS; - lookAheadTable = new int[TOTAL_LOOKAHEAD_STATES][maxTerminals]; - - for (int i = 0; i <= POST_IDENTIFIER; i++) { - for (int j = 0; j < maxTerminals; j++) { - lookAheadTable[i][j] = START_STATE; - } - lookAheadTable[i][TokenNameEQUAL] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNamereturn] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNameLPAREN] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNameRPAREN] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNameCOMMA] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNameARROW] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNameQUESTION] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNameCOLON] = POST_LAMBDA_PREFIX; - lookAheadTable[i][TokenNameIdentifier] = POST_IDENTIFIER; - } - - lookAheadTable[POST_IDENTIFIER][TokenNameBeginTypeArguments] = POST_BEGIN_TYPE_ARGUMENTS; - - for (int i = 0; i < maxTerminals; i++) { - lookAheadTable[POST_BEGIN_TYPE_ARGUMENTS][i] = POST_BEGIN_TYPE_ARGUMENTS; // be stuck here until "::", so we don't inject the - } - lookAheadTable[POST_BEGIN_TYPE_ARGUMENTS][TokenNameCOLON_COLON] = START_STATE; // synthetic token ahead of the second '<' in X<T>.Y<Q>:: - } + private ConflictedParser activeParser = null; public static final int RoundBracket = 0; public static final int SquareBracket = 1; @@ -261,7 +226,6 @@ public Scanner( this.sourceLevel = sourceLevel; this.nextToken = TokenNameNotAToken; this.scanningJava8Plus = sourceLevel >= ClassFileConstants.JDK1_8; - this.lookAheadState = START_STATE; this.complianceLevel = complianceLevel; this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals; if (taskTags != null) { @@ -1165,36 +1129,38 @@ public int scanIdentifier() throws InvalidInputException { return TokenNameERROR; } } -public void ungetToken(int token) { +public void ungetToken(int unambiguousToken) { if (this.nextToken != TokenNameNotAToken) { throw new ArrayIndexOutOfBoundsException("Single cell array overflow"); //$NON-NLS-1$ } - this.nextToken = token; + this.nextToken = unambiguousToken; } public int getNextToken() throws InvalidInputException { - int token = this.nextToken != TokenNameNotAToken ? this.nextToken : getNextToken0(); - this.nextToken = TokenNameNotAToken; + int token; + if (this.nextToken != TokenNameNotAToken) { + token = this.nextToken; + this.nextToken = TokenNameNotAToken; + return token; // presumed to be unambiguous. + } - if (!this.scanningJava8Plus) { - return token; + token = getNextToken0(); + if (!this.scanningJava8Plus || this.activeParser == null) { + return token; // no audience, no magic. } - if (token == TokenNameLPAREN && this.lookAheadState == POST_LAMBDA_PREFIX) { + if (token == TokenNameLPAREN && this.activeParser.atConflictScenario(token)) { if (atLambdaParameterList()) { this.nextToken = token; token = TokenNameBeginLambda; } - } else if (token == TokenNameLESS && this.lookAheadState == POST_IDENTIFIER) { + } else if (token == TokenNameLESS && this.activeParser.atConflictScenario(token)) { if (atReferenceExpression()) { this.nextToken = token; token = TokenNameBeginTypeArguments; } } - if (token < ParserBasicInformation.NUM_TERMINALS) { - this.lookAheadState = lookAheadTable[this.lookAheadState][token]; - } return token; } protected int getNextToken0() throws InvalidInputException { @@ -2774,7 +2740,6 @@ public void resetTo(int begin, int end) { this.commentPtr = -1; // reset comment stack this.foundTaskCount = 0; this.nextToken = TokenNameNotAToken; - this.lookAheadState = START_STATE; } protected final void scanEscapeCharacter() throws InvalidInputException { @@ -4326,4 +4291,8 @@ private final boolean atLambdaParameterList() { // Did the '(' we saw just now h private final boolean atReferenceExpression() { // Did the '<' we saw just now herald a reference expression ? return getVanguardParser().parse(TokenNameCOLON_COLON); } + +public void setActiveParser(ConflictedParser parser) { + this.activeParser = parser; +} } diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java index d6e4c2af94..8b1d9e686c 100644 --- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java +++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java @@ -16,6 +16,7 @@ package org.eclipse.jdt.internal.compiler.parser.diagnose; import org.eclipse.jdt.core.compiler.CharOperation; import org.eclipse.jdt.internal.compiler.impl.CompilerOptions; +import org.eclipse.jdt.internal.compiler.parser.ConflictedParser; import org.eclipse.jdt.internal.compiler.parser.Parser; import org.eclipse.jdt.internal.compiler.parser.ParserBasicInformation; import org.eclipse.jdt.internal.compiler.parser.RecoveryScanner; @@ -25,7 +26,7 @@ import org.eclipse.jdt.internal.compiler.parser.TerminalTokens; import org.eclipse.jdt.internal.compiler.problem.ProblemReporter; import org.eclipse.jdt.internal.compiler.util.Util; -public class DiagnoseParser implements ParserBasicInformation, TerminalTokens { +public class DiagnoseParser implements ParserBasicInformation, TerminalTokens, ConflictedParser { private static final boolean DEBUG = false; private boolean DEBUG_PARSECHECK = false; @@ -198,6 +199,7 @@ public class DiagnoseParser implements ParserBasicInformation, TerminalTokens { oldRecord = this.recoveryScanner.record; this.recoveryScanner.record = record; } + this.parser.scanner.setActiveParser(this); try { this.lexStream.reset(); @@ -426,6 +428,7 @@ public class DiagnoseParser implements ParserBasicInformation, TerminalTokens { if(this.recoveryScanner != null) { this.recoveryScanner.record = oldRecord; } + this.parser.scanner.setActiveParser(null); } return; } @@ -2595,4 +2598,14 @@ public class DiagnoseParser implements ParserBasicInformation, TerminalTokens { return res.toString(); } + + public boolean atConflictScenario(int token) { + /* There is too much voodoo that goes on here in DiagnoseParser (multiple machines, lexer stream reset etc.) + So we take a simple minded view that we will always ask for disambiguation, except there is one scenario + that needs special handling, we let the lexer stream deal with that: In X<String>.Y<Integer>:: the second + '<' should not be tagged for disambiguation. If a synthetic token gets injected there, there will be syntax + error. See that this is not a problem for the regular/normal parser. + */ + return this.lexStream.atConflictScenario(token); + } } diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java index 64858ce3fb..d935bff835 100644 --- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java +++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java @@ -1,10 +1,14 @@ /******************************************************************************* - * Copyright (c) 2000, 2009 IBM Corporation and others. + * Copyright (c) 2000, 2012 IBM Corporation and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * + * This is an implementation of an early-draft specification developed under the Java + * Community Process (JCP) and is made available for testing and evaluation purposes + * only. The code is not compatible with any specification of the JCP. + * * Contributors: * IBM Corporation - initial API and implementation *******************************************************************************/ @@ -50,6 +54,7 @@ public class LexStream implements TerminalTokens { private int previousInterval = -1; private int currentInterval = -1; + private boolean awaitingColonColon; public LexStream(int size, Scanner scanner, int[] intervalStartToSkip, int[] intervalEndToSkip, int[] intervalFlagsToSkip, int firstToken, int init, int eof) { this.tokenCache = new Token[size]; @@ -65,7 +70,7 @@ public class LexStream implements TerminalTokens { this.intervalStartToSkip = intervalStartToSkip; this.intervalEndToSkip = intervalEndToSkip; this.intervalFlagsToSkip = intervalFlagsToSkip; - + this.awaitingColonColon = false; scanner.resetTo(init, eof); this.scanner = scanner; } @@ -77,6 +82,11 @@ public class LexStream implements TerminalTokens { while(tokenNotFound) { try { int tokenKind = this.scanner.getNextToken(); + if (tokenKind == TokenNameBeginTypeArguments) { + this.awaitingColonColon = true; + } else if (tokenKind == TokenNameCOLON_COLON) { + this.awaitingColonColon = false; + } if(tokenKind != TokenNameEOF) { int start = this.scanner.getCurrentTokenStartPosition(); int end = this.scanner.getCurrentTokenEndPosition(); @@ -288,4 +298,8 @@ public class LexStream implements TerminalTokens { return res.toString(); } + + public boolean atConflictScenario(int token) { + return (token == TokenNameLPAREN || (token == TokenNameLESS && !this.awaitingColonColon)); + } } |