Rework of lookahead strategy

author: Srikanth 2012-07-14 07:43:31 +0000
committer: Srikanth 2012-07-14 07:43:31 +0000
commit: e0956e75ff6cb3066016adc9dae90f9c1534dda4 (patch)
tree: 8b95f618d5e1dae1482a7f9a4f74d84c1581a73a
parent: 4c2900096a422605a0884d5ca39839064ce0c89a (diff)
download: eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.tar.gz
eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.tar.xz
eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.zip
5 files changed, 146 insertions, 53 deletions
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/ConflictedParser.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/ConflictedParser.java
new file mode 100644
index 0000000000..a795f04a0d
--- /dev/null
+++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/ConflictedParser.java
@@ -0,0 +1,23 @@
+/*******************************************************************************
+ * Copyright (c) 2000, 2012 IBM Corporation and others.
+ * All rights reserved. This program and the accompanying materials
+ * are made available under the terms of the Eclipse Public License v1.0
+ * which accompanies this distribution, and is available at
+ * http://www.eclipse.org/legal/epl-v10.html
+ * 
+ * This is an implementation of an early-draft specification developed under the Java
+ * Community Process (JCP) and is made available for testing and evaluation purposes
+ * only. The code is not compatible with any specification of the JCP.
+ * 
+ * Contributors:
+ *     IBM Corporation - initial API and implementation
+ *******************************************************************************/
+package org.eclipse.jdt.internal.compiler.parser;
+
+public interface ConflictedParser {
+	
+	/* Return true if at the configuration the parser finds itself in, token would need to be disambiguated.
+	   At Java SE 8 time, we have two tokens that need to clarified: the use of '( and that of '<'
+	*/
+	boolean atConflictScenario(int token);
+}
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java
index 9075565d1b..b437cbecab 100644
--- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java
+++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Parser.java
@@ -145,7 +145,7 @@ import org.eclipse.jdt.internal.compiler.problem.ProblemSeverities;
 import org.eclipse.jdt.internal.compiler.util.Messages;
 import org.eclipse.jdt.internal.compiler.util.Util;
 
-public class Parser implements  ParserBasicInformation, TerminalTokens, OperatorIds, TypeIds {
+public class Parser implements ConflictedParser, ParserBasicInformation, TerminalTokens, OperatorIds, TypeIds {
 	
 	protected static final int THIS_CALL = ExplicitConstructorCall.This;
 	protected static final int SUPER_CALL = ExplicitConstructorCall.Super;
@@ -998,6 +998,7 @@ private boolean shouldDeferRecovery = false; // https://bugs.eclipse.org/bugs/sh
 private int valueLambdaNestDepth = -1;
 private int stateStackLengthStack[] = new int[0];
 private boolean parsingJava8Plus;
+private int unstackedAct = ERROR_ACTION;
 
 public Parser(ProblemReporter problemReporter, boolean optimizeStringLiterals) {
 
@@ -10901,6 +10902,67 @@ protected void optimizedConcatNodeLists() {
 
 	this.astLengthStack[--this.astLengthPtr]++;
 }
+public boolean atConflictScenario(int token) {
+	
+	/* Answer true if the parser is at a configuration where the scanner must look ahead and help disambiguate between (a) '<' as an operator and '<' as the
+	   start of <type argument> and (b) the use of '(' in '(' expression ')' and '( type ')' and '(' lambda formal parameters ')'. When requested thus,
+	   the scanner helps by fabricating synthetic tokens and injecting them into the stream ahead of the tokens that trigger conflicts in the absence
+	   of these artificial tokens. These manufactured token help transform the grammar into LALR(1) by splitting the states so that they have unambigious
+	   prefixes.
+	   
+	   We do this by claiming to the automaton that the next token seen is the (suitable) synthetic token and observing the response of the state machine. 
+	   Error signals we are NOT at a conflict site, while shift or shift/reduce signals that we are. Accept is impossible, while there may be intermediate
+	   reductions that are called for -- It is axiomatic of the push down automaton that corresponds to the LALR grammar that it will never shift on invalid
+	   input.
+	   
+	   Obviously, the dry runs should not alter the parser state in any way or otherwise cause side effects. Proof by argument that this is the case:
+	   
+	       - The only pieces of state needed to answer the question are: this.stack, this.stateStackTop and the last action variable `act`. None of the various
+	         and sundry stacks used in the AST constructions process are touched here.
+	       - As we reduce, we DON'T call the semantic action functions i.e the consume* method calls are skipped.
+	       - Lexer stream is left untouched.
+	       - this.stateStackTop and the last action variable `act` of the automaton are readily cloned, these being primitives and changes are to the replicas.
+	       - We never remove elements from the state stack here (or elsewhere for that matter). Pops are implemented by mere adjustments of the stack pointer.
+	       - During this algorithm, either the stack pointer monotonically decreases or stays fixed. (The only way for the stack pointer to increase would call
+	         for a shift or a shift/reduce at which point the algorithm is ready to terminate already.) This means that we don't have to replicate the stack. 
+	         Pushes can be mimiced by writing to a local stackTopState variable, leaving the original stack untouched.
+	         
+	    Though this code looks complex, we should exit early in most situations.     
+	 */
+	int lastAction = this.unstackedAct;
+	if (lastAction == ERROR_ACTION) { // automaton is not running.
+		return false;
+	}
+	int stackTop = this.stateStackTop;        // local copy of stack pointer
+	int stackTopState = this.stack[stackTop]; // single cell non write through "alternate stack" - the automaton's stack pointer either stays fixed during this manoeuvre or monotonically decreases.
+	int highWaterMark = stackTop;
+	
+	token = token == TokenNameLPAREN ? TokenNameBeginLambda : TokenNameBeginTypeArguments;
+	
+	// A rotated version of the automaton - cf. parse()'s for(;;)
+	for (;;) {  
+		if (lastAction > ERROR_ACTION) {  /* shift-reduce on loop entry from above, reduce on loop back */
+			lastAction -= ERROR_ACTION;
+			do { /* reduce */
+				stackTop -= rhs[lastAction] - 1;
+				if (stackTop < highWaterMark) {
+					stackTopState = this.stack[highWaterMark = stackTop];
+				} // else stackTopState is upto date already.
+				lastAction = ntAction(stackTopState, lhs[lastAction]);
+			} while (lastAction <= NUM_RULES);
+		}
+		highWaterMark = ++stackTop;
+		stackTopState = lastAction; // "push"
+		lastAction = tAction(lastAction, token); // can be looked up from a precomputed cache.
+		if (lastAction <= NUM_RULES) {
+			stackTop --; 
+		    lastAction += ERROR_ACTION;
+			continue;
+		}
+		// Error => false, Shift, Shift/Reduce => true, Accept => impossible. 
+		return lastAction != ERROR_ACTION;
+	}
+}
 /*main loop of the automat
 When a rule is reduced, the method consumeRule(int) is called with the number
 of the consumed rule. When a terminal is consumed, the method consumeToken(int) is
@@ -10922,6 +10984,9 @@ protected void parse() {
 	int act = START_STATE;
 	this.stateStackTop = -1;
 	this.currentToken = getFirstToken();
+	
+try {
+	this.scanner.setActiveParser(this);
 	ProcessTerminals : for (;;) {
 		int stackLength = this.stack.length;
 		if (++this.stateStackTop >= stackLength) {
@@ -10974,6 +11039,7 @@ protected void parse() {
 				this.recordStringLiterals = oldValue;
 			}
 			try {
+				this.unstackedAct = act;
 				this.currentToken = this.scanner.getNextToken();
 			} catch(InvalidInputException e){
 				if (!this.hasReportedError){
@@ -10983,6 +11049,8 @@ protected void parse() {
 				this.lastCheckPoint = this.scanner.currentPosition;
 				this.currentToken = 0;
 				this.restartRecovery = true;
+			} finally {
+				this.unstackedAct = ERROR_ACTION;
 			}
 			if(this.statementRecoveryActivated) {
 				jumpOverType();
@@ -11003,6 +11071,7 @@ protected void parse() {
 					this.recordStringLiterals = oldValue;
 				}
 				try{
+					this.unstackedAct = act;
 					this.currentToken = this.scanner.getNextToken();
 				} catch(InvalidInputException e){
 					if (!this.hasReportedError){
@@ -11012,6 +11081,8 @@ protected void parse() {
 					this.lastCheckPoint = this.scanner.currentPosition;
 					this.currentToken = 0;
 					this.restartRecovery = true;
+				} finally {
+					this.unstackedAct = ERROR_ACTION;
 				}
 				if(this.statementRecoveryActivated) {
 					jumpOverType();
@@ -11047,6 +11118,9 @@ protected void parse() {
 			System.out.println("----------------------------------------");  //$NON-NLS-1$
 		}
 	}
+} finally {
+	this.scanner.setActiveParser(null);
+}
 
 	if (DEBUG_AUTOMATON) {
 		System.out.println("- End ----------------------------------");  //$NON-NLS-1$
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java
index c902a03872..4008acc62f 100644
--- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java
+++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/Scanner.java
@@ -193,42 +193,7 @@ public class Scanner implements TerminalTokens {
 	private final boolean scanningJava8Plus;
 	private VanguardScanner vanguardScanner;
 	private VanguardParser vanguardParser;
-
-	private int lookAheadState = START_STATE;
-	private static int[][] lookAheadTable;
-	
-	private static final int START_STATE = 0;
-	private static final int POST_LAMBDA_PREFIX = 1;
-	private static final int POST_IDENTIFIER = 2;
-	private static final int POST_BEGIN_TYPE_ARGUMENTS = 3;
-	private static final int TOTAL_LOOKAHEAD_STATES = 4;
-
-	static {
-		int maxTerminals = ParserBasicInformation.NUM_TERMINALS;
-		lookAheadTable = new int[TOTAL_LOOKAHEAD_STATES][maxTerminals];
-		
-		for (int i = 0; i <= POST_IDENTIFIER; i++) {
-			for (int j = 0; j < maxTerminals; j++) {
-				lookAheadTable[i][j] = START_STATE;
-			}	
-			lookAheadTable[i][TokenNameEQUAL] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNamereturn] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNameLPAREN] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNameRPAREN] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNameCOMMA] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNameARROW] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNameQUESTION] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNameCOLON] = POST_LAMBDA_PREFIX;
-			lookAheadTable[i][TokenNameIdentifier] = POST_IDENTIFIER;
-		}
-		
-		lookAheadTable[POST_IDENTIFIER][TokenNameBeginTypeArguments] = POST_BEGIN_TYPE_ARGUMENTS;
-
-		for (int i = 0; i < maxTerminals; i++) {
-			lookAheadTable[POST_BEGIN_TYPE_ARGUMENTS][i] = POST_BEGIN_TYPE_ARGUMENTS;  // be stuck here until "::", so we don't inject the
-		}
-		lookAheadTable[POST_BEGIN_TYPE_ARGUMENTS][TokenNameCOLON_COLON] = START_STATE; // synthetic token ahead of the second '<' in X<T>.Y<Q>::
-	}
+	private ConflictedParser activeParser = null;
 	
 	public static final int RoundBracket = 0;
 	public static final int SquareBracket = 1;
@@ -261,7 +226,6 @@ public Scanner(
 	this.sourceLevel = sourceLevel;
 	this.nextToken = TokenNameNotAToken;
 	this.scanningJava8Plus = sourceLevel >= ClassFileConstants.JDK1_8;
-	this.lookAheadState = START_STATE;
 	this.complianceLevel = complianceLevel;
 	this.checkNonExternalizedStringLiterals = checkNonExternalizedStringLiterals;
 	if (taskTags != null) {
@@ -1165,36 +1129,38 @@ public int scanIdentifier() throws InvalidInputException {
 		return TokenNameERROR;
 	}
 }
-public void ungetToken(int token) {
+public void ungetToken(int unambiguousToken) {
 	if (this.nextToken != TokenNameNotAToken) {
 		throw new ArrayIndexOutOfBoundsException("Single cell array overflow"); //$NON-NLS-1$
 	}
-	this.nextToken = token;
+	this.nextToken = unambiguousToken;
 }
 
 public int getNextToken() throws InvalidInputException {
 	
-	int token = this.nextToken != TokenNameNotAToken ? this.nextToken : getNextToken0();
-	this.nextToken = TokenNameNotAToken;
+	int token;
+	if (this.nextToken != TokenNameNotAToken) {
+		token = this.nextToken;
+		this.nextToken = TokenNameNotAToken;
+		return token; // presumed to be unambiguous.
+	}
 	
-	if (!this.scanningJava8Plus) {
-		return token;
+	token = getNextToken0();
+	if (!this.scanningJava8Plus || this.activeParser == null) {
+		return token;  // no audience, no magic.
 	}
 
-	if (token == TokenNameLPAREN && this.lookAheadState == POST_LAMBDA_PREFIX) {
+	if (token == TokenNameLPAREN && this.activeParser.atConflictScenario(token)) {
 		if (atLambdaParameterList()) {
 			this.nextToken = token;
 			token = TokenNameBeginLambda;
 		}
-	} else if (token == TokenNameLESS && this.lookAheadState == POST_IDENTIFIER) {
+	} else if (token == TokenNameLESS && this.activeParser.atConflictScenario(token)) {
 		if (atReferenceExpression()) {
 			this.nextToken = token;
 			token = TokenNameBeginTypeArguments;
 		}
 	}
-	if (token < ParserBasicInformation.NUM_TERMINALS) {
-		this.lookAheadState = lookAheadTable[this.lookAheadState][token];
-	}
 	return token;
 }
 protected int getNextToken0() throws InvalidInputException {
@@ -2774,7 +2740,6 @@ public void resetTo(int begin, int end) {
 	this.commentPtr = -1; // reset comment stack
 	this.foundTaskCount = 0;
 	this.nextToken = TokenNameNotAToken;
-	this.lookAheadState = START_STATE;
 }
 
 protected final void scanEscapeCharacter() throws InvalidInputException {
@@ -4326,4 +4291,8 @@ private final boolean atLambdaParameterList() { // Did the '(' we saw just now h
 private final boolean atReferenceExpression() { // Did the '<' we saw just now herald a reference expression ?
 	return getVanguardParser().parse(TokenNameCOLON_COLON);
 }
+
+public void setActiveParser(ConflictedParser parser) {
+	this.activeParser  = parser;
+}
 }
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java
index d6e4c2af94..8b1d9e686c 100644
--- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java
+++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/DiagnoseParser.java
@@ -16,6 +16,7 @@ package org.eclipse.jdt.internal.compiler.parser.diagnose;
 
 import org.eclipse.jdt.core.compiler.CharOperation;
 import org.eclipse.jdt.internal.compiler.impl.CompilerOptions;
+import org.eclipse.jdt.internal.compiler.parser.ConflictedParser;
 import org.eclipse.jdt.internal.compiler.parser.Parser;
 import org.eclipse.jdt.internal.compiler.parser.ParserBasicInformation;
 import org.eclipse.jdt.internal.compiler.parser.RecoveryScanner;
@@ -25,7 +26,7 @@ import org.eclipse.jdt.internal.compiler.parser.TerminalTokens;
 import org.eclipse.jdt.internal.compiler.problem.ProblemReporter;
 import org.eclipse.jdt.internal.compiler.util.Util;
 
-public class DiagnoseParser implements ParserBasicInformation, TerminalTokens {
+public class DiagnoseParser implements ParserBasicInformation, TerminalTokens, ConflictedParser {
 	private static final boolean DEBUG = false;
 	private boolean DEBUG_PARSECHECK = false;
 
@@ -198,6 +199,7 @@ public class DiagnoseParser implements ParserBasicInformation, TerminalTokens {
 			oldRecord = this.recoveryScanner.record;
 			this.recoveryScanner.record = record;
 		}
+		this.parser.scanner.setActiveParser(this);
 		try {
 			this.lexStream.reset();
 
@@ -426,6 +428,7 @@ public class DiagnoseParser implements ParserBasicInformation, TerminalTokens {
 			if(this.recoveryScanner != null) {
 				this.recoveryScanner.record = oldRecord;
 			}
+			this.parser.scanner.setActiveParser(null);
 		}
 		return;
 	}
@@ -2595,4 +2598,14 @@ public class DiagnoseParser implements ParserBasicInformation, TerminalTokens {
 
 		return res.toString();
 	}
+
+	public boolean atConflictScenario(int token) {
+		/* There is too much voodoo that goes on here in DiagnoseParser (multiple machines, lexer stream reset etc.)
+		   So we take a simple minded view that we will always ask for disambiguation, except there is one scenario 
+		   that needs special handling, we let the lexer stream deal with that: In X<String>.Y<Integer>:: the second
+		   '<' should not be tagged for disambiguation. If a synthetic token gets injected there, there will be syntax
+		   error. See that this is not a problem for the regular/normal parser.
+		*/ 
+		return this.lexStream.atConflictScenario(token);
+	}
 }
diff --git a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java
index 64858ce3fb..d935bff835 100644
--- a/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java
+++ b/org.eclipse.jdt.core/compiler/org/eclipse/jdt/internal/compiler/parser/diagnose/LexStream.java
@@ -1,10 +1,14 @@
 /*******************************************************************************
- * Copyright (c) 2000, 2009 IBM Corporation and others.
+ * Copyright (c) 2000, 2012 IBM Corporation and others.
  * All rights reserved. This program and the accompanying materials
  * are made available under the terms of the Eclipse Public License v1.0
  * which accompanies this distribution, and is available at
  * http://www.eclipse.org/legal/epl-v10.html
  *
+ * This is an implementation of an early-draft specification developed under the Java
+ * Community Process (JCP) and is made available for testing and evaluation purposes
+ * only. The code is not compatible with any specification of the JCP.
+ *
  * Contributors:
  *     IBM Corporation - initial API and implementation
  *******************************************************************************/
@@ -50,6 +54,7 @@ public class LexStream implements TerminalTokens {
 
 	private int previousInterval = -1;
 	private int currentInterval = -1;
+	private boolean awaitingColonColon;
 
 	public LexStream(int size, Scanner scanner, int[] intervalStartToSkip, int[] intervalEndToSkip, int[] intervalFlagsToSkip, int firstToken, int init, int eof) {
 		this.tokenCache = new Token[size];
@@ -65,7 +70,7 @@ public class LexStream implements TerminalTokens {
 		this.intervalStartToSkip = intervalStartToSkip;
 		this.intervalEndToSkip = intervalEndToSkip;
 		this.intervalFlagsToSkip = intervalFlagsToSkip;
-
+		this.awaitingColonColon = false;
 		scanner.resetTo(init, eof);
 		this.scanner = scanner;
 	}
@@ -77,6 +82,11 @@ public class LexStream implements TerminalTokens {
 		while(tokenNotFound) {
 			try {
 				int tokenKind =  this.scanner.getNextToken();
+				if (tokenKind == TokenNameBeginTypeArguments) {
+					this.awaitingColonColon = true;
+				} else if (tokenKind == TokenNameCOLON_COLON) {
+					this.awaitingColonColon = false;
+				}
 				if(tokenKind != TokenNameEOF) {
 					int start = this.scanner.getCurrentTokenStartPosition();
 					int end = this.scanner.getCurrentTokenEndPosition();
@@ -288,4 +298,8 @@ public class LexStream implements TerminalTokens {
 
 		return res.toString();
 	}
+
+	public boolean atConflictScenario(int token) {
+		return (token == TokenNameLPAREN || (token == TokenNameLESS && !this.awaitingColonColon));
+	}
 }
author	Srikanth	2012-07-14 07:43:31 +0000
committer	Srikanth	2012-07-14 07:43:31 +0000
commit	e0956e75ff6cb3066016adc9dae90f9c1534dda4 (patch)
tree	8b95f618d5e1dae1482a7f9a4f74d84c1581a73a
parent	4c2900096a422605a0884d5ca39839064ce0c89a (diff)
download	eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.tar.gz eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.tar.xz eclipse.jdt.core-e0956e75ff6cb3066016adc9dae90f9c1534dda4.zip