Skip to main content
summaryrefslogtreecommitdiffstats
blob: a54502edc0fb29b26c99b0db0f4e6e95eeede2ae (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
/*******************************************************************************
 * Copyright (c) 2000, 2008 IBM Corporation and others.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     IBM Corporation - initial API and implementation
 *******************************************************************************/

package org.eclipse.wst.jsdt.core.compiler;


 /**
  * Definition of a JavaScript scanner, as returned by the <code>ToolFactory</code>.
  * The scanner is responsible for tokenizing a given source, providing information about
  * the nature of the token read, its positions and source equivalent.
  * <p>
  * When the scanner has finished tokenizing, it answers an EOF token (<code>
  * ITerminalSymbols#TokenNameEOF</code>.
  * </p><p>
  * When encountering lexical errors, an <code>InvalidInputException</code> is thrown.
 * </p><p>
 * This interface is not intended to be implemented by clients.
 * </p>
  *
  * @see org.eclipse.wst.jsdt.core.ToolFactory
  * @see ITerminalSymbols
 *  
 * Provisional API: This class/interface is part of an interim API that is still under development and expected to 
 * change significantly before reaching stability. It is being made available at this early stage to solicit feedback 
 * from pioneering adopters on the understanding that any code that uses this API will almost certainly be broken 
 * (repeatedly) as the API evolves.
  */
public interface IScanner {

	/**
	 * Answers the current identifier source, after unicode escape sequences have
	 * been translated into unicode characters.
	 * For example, if original source was <code>\\u0061bc</code> then it will answer <code>abc</code>.
	 *
	 * @return the current identifier source, after unicode escape sequences have
	 * been translated into unicode characters
	 */
	char[] getCurrentTokenSource();

	/**
	 * Answers the current identifier source, before unicode escape sequences have
	 * been translated into unicode characters.
	 * For example, if original source was <code>\\u0061bc</code> then it will answer <code>\\u0061bc</code>.
	 *
	 * @return the current identifier source, before unicode escape sequences have
	 * been translated into unicode characters
	 * @since 2.1
	 */
	char[] getRawTokenSource();

	/**
	 * Answers the starting position of the current token inside the original source.
	 * This position is zero-based and inclusive. It corresponds to the position of the first character
	 * which is part of this token. If this character was a unicode escape sequence, it points at the first
	 * character of this sequence.
	 *
	 * @return the starting position of the current token inside the original source
	 */
	int getCurrentTokenStartPosition();

	/**
	 * Answers the ending position of the current token inside the original source.
	 * This position is zero-based and inclusive. It corresponds to the position of the last character
	 * which is part of this token. If this character was a unicode escape sequence, it points at the last
	 * character of this sequence.
	 *
	 * @return the ending position of the current token inside the original source
	 */
	int getCurrentTokenEndPosition();

	/**
	 * Answers the starting position of a given line number. This line has to have been encountered
	 * already in the tokenization process (in other words, it cannot be used to compute positions of lines beyond
	 * current token). Once the entire source has been processed, it can be used without any limit.
	 * Line starting positions are zero-based, and start immediately after the previous line separator (if any).
	 *
	 * @param lineNumber the given line number
	 * @return the starting position of a given line number
	 */
	int getLineStart(int lineNumber);

	/**
	 * Answers the ending position of a given line number. This line has to have been encountered
	 * already in the tokenization process (in other words, it cannot be used to compute positions of lines beyond
	 * current token). Once the entire source has been processed, it can be used without any limit.
	 * Line ending positions are zero-based, and correspond to the last character of the line separator
	 * (in case multi-character line separators).
	 *
	 * @param lineNumber the given line number
	 * @return the ending position of a given line number
	 **/
	int getLineEnd(int lineNumber);

	/**
	 * Answers an array of the ending positions of the lines encountered so far. Line ending positions
	 * are zero-based, and correspond to the last character of the line separator (in case multi-character
	 * line separators).
	 *
	 * @return an array of the ending positions of the lines encountered so far
	 */
	int[] getLineEnds();

	/**
	 * Answers a 1-based line number using the lines which have been encountered so far. If the position
	 * is located beyond the current scanned line, then the last line number will be answered.
	 *
	 * @param charPosition the given character position
	 * @return a 1-based line number using the lines which have been encountered so far
	 */
	int getLineNumber(int charPosition);

	/**
	 * Read the next token in the source, and answers its ID as specified by <code>ITerminalSymbols</code>.
	 * Note that the actual token ID values are subject to change if new keywords were added to the language
	 * (for instance, 'assert' is a keyword in 1.4).
	 *
	 * @throws InvalidInputException in case a lexical error was detected while reading the current token
	 * @return the next token
	 */
	int getNextToken() throws InvalidInputException;

	/**
	 * Answers the original source being processed (not a copy of it).
	 *
	 * @return the original source being processed
	 */
	char[] getSource();

	/**
	 * Reposition the scanner on some portion of the original source. The given endPosition is the last valid position.
	 * Beyond this position, the scanner will answer EOF tokens (<code>ITerminalSymbols.TokenNameEOF</code>).
	 *
	 * @param startPosition the given start position
	 * @param endPosition the given end position
	 */
	void resetTo(int startPosition, int endPosition);

	/**
	 * Set the scanner source to process. By default, the scanner will consider starting at the beginning of the
	 * source until it reaches its end.
	 * If the given source is <code>null</code>, this clears the source.
	 *
	 * @param source the given source
	 */
	void setSource(char[] source);
}

Back to the top