blob: 57b3e83f9b5840b8c4a2782129d28920d381c05b [file] [log] [blame]
david_williamscfdb2cd2004-11-11 08:37:49 +00001/*******************************************************************************
2 * Copyright (c) 2004 IBM Corporation and others.
3 * All rights reserved. This program and the accompanying materials
4 * are made available under the terms of the Eclipse Public License v1.0
5 * which accompanies this distribution, and is available at
6 * http://www.eclipse.org/legal/epl-v10.html
7 *
8 * Contributors:
9 * IBM Corporation - initial API and implementation
10 *******************************************************************************/
11/*nlsXXX*/
david_williams78e4db02006-06-07 22:58:51 +000012package org.eclipse.wst.css.core.internal.parser;
david_williamscfdb2cd2004-11-11 08:37:49 +000013
14import java.io.CharArrayReader;
15import java.io.IOException;
16import java.util.ArrayList;
17import java.util.List;
18
david_williams78e4db02006-06-07 22:58:51 +000019import org.eclipse.wst.css.core.internal.parser.regions.CSSTextRegionFactory;
20import org.eclipse.wst.css.core.internal.parserz.CSSRegionContexts;
21import org.eclipse.wst.css.core.internal.parserz.CSSTextToken;
22import org.eclipse.wst.sse.core.internal.provisional.text.ITextRegion;
david_williamscfdb2cd2004-11-11 08:37:49 +000023
24%%
25
26%public
27%class CSSTokenizer
david_williams78e4db02006-06-07 22:58:51 +000028%implements CSSRegionContexts, ICSSTokenizer
david_williamscfdb2cd2004-11-11 08:37:49 +000029%function primGetNextToken
30%type String
31%char
32%line
33%unicode
34%caseless
35%debug
36%pack
37
38%{
39 private final static String UNDEFINED = "undefined";
40 private String fBufferedContext = null;
41 private int fBufferedStart;
42// private int fBufferedTextLength;
43 private int fBufferedLength;
44// private StringBuffer fBufferedText = null;
45 private CSSTextRegionFactory fRegionFactory = CSSTextRegionFactory.getInstance();
46 private int fInitialState = YYINITIAL;
47 public final static int BUFFER_SIZE_NORMAL = 16384;
48 public final static int BUFFER_SIZE_SMALL = 256;
49 private int fInitialBufferSize = BUFFER_SIZE_NORMAL;
50
51 public void setInitialState(int state) {
52 fInitialState = state;
53 }
54
55 public void setInitialBufferSize(int size) {
56 fInitialBufferSize = size;
57 }
58
59 /* user method */
60 public final ITextRegion getNextToken() throws IOException {
61 String context;
62 String nextTokenType;
63 boolean spaceFollows;
64// StringBuffer text;
65 int start;
66 int textLength;
67 int length;
68 if (fBufferedContext != null) {
69 context = fBufferedContext;
70// text = fBufferedText;
71 start = fBufferedStart;
72 textLength = length = fBufferedLength;
73
74 fBufferedContext = null;
75 } else {
76 context = primGetNextToken();
77// text = new StringBuffer(yytext());
78 start = yychar;
79 textLength = length = yylength();
80 }
81
82 if (context != null) {
83 if (context == UNDEFINED) {
84 // undef -> concatenate undef's
85 nextTokenType = primGetNextToken();
86 while (nextTokenType == UNDEFINED) {
87// text.append(yytext());
88 textLength += yylength();
89 length = textLength;
90 nextTokenType = primGetNextToken();
91 }
92 fBufferedContext = nextTokenType;
93// fBufferedText = new StringBuffer(yytext());
94 fBufferedStart = yychar;
95 fBufferedLength = yylength();
96 } else {
97 nextTokenType = null;
98 spaceFollows = false;
99 if (CSSRegionUtil.isDeclarationValueType(context)) { // declaration value can contain VALUE_S
100 nextTokenType = primGetNextToken();
101 spaceFollows = (nextTokenType == CSS_DECLARATION_VALUE_S);
102 } else if (canContainSpace(context)) {
103 nextTokenType = primGetNextToken();
104 spaceFollows = (nextTokenType == CSS_S);
105 }
106 if (nextTokenType != null) { // nextToken is retrieved
107 if (spaceFollows) {
108 // next is space -> append
109// text.append(yytext());
110 length += yylength();
111 } else {
112 // next is NOT space -> push this for next time, return itself
113 fBufferedContext = nextTokenType;
114// fBufferedText = new StringBuffer(yytext());
115 fBufferedStart = yychar;
116 fBufferedLength = yylength();
117 }
118 }
119 }
120 }
121
122 if (context != null) {
123 if (context == UNDEFINED) {
124 context = CSS_UNKNOWN;
125 }
126 return fRegionFactory.createRegion(context, start, textLength, length);
127 } else {
128 return null;
129 }
130 }
131
132 /* user method */
133 /* for standalone use */
134 public final List parseText() throws IOException {
135 List tokens = new ArrayList();
136
137 CSSTextToken token;
138 for (String kind = primGetNextToken(); kind != null; kind = primGetNextToken()) {
139 token = new CSSTextToken();
140 token.kind = kind;
141 token.start = yychar;
142 token.length = yylength();
143 token.image = yytext();
144 tokens.add(token);
145 }
146
147 return tokens;
148 }
149
150 /* user method */
151 private boolean canContainSpace(String type) {
152 if (type == CSS_DELIMITER || type == CSS_RBRACE || type == CSS_DECLARATION_DELIMITER) {
153 return false;
154 } else {
155 return true;
156 }
157 }
158
159 /* user method */
160 public final int getOffset() {
161 return yychar;
162 }
163
164 /* user method */
165 public final boolean isEOF() {
166 return yy_atEOF;
167 }
168
169 /* user method */
170 public void reset(char[] charArray) {
171 reset(new CharArrayReader(charArray), 0);
172 }
173
174 /* user method */
175 public final void reset(java.io.Reader in, int newOffset) {
176 /** the input device */
177 yy_reader = in;
178
179 /** the current state of the DFA */
180 yy_state = 0;
181
182 /** the current lexical state */
183 yy_lexical_state = fInitialState; //YYINITIAL;
184
185 /** this buffer contains the current text to be matched and is
186 the source of the yytext() string */
187 if (yy_buffer.length != fInitialBufferSize) {
188 yy_buffer = new char[fInitialBufferSize];
189 }
190 java.util.Arrays.fill(yy_buffer, (char)0);
191
192 /** the textposition at the last accepting state */
193 yy_markedPos = 0;
194
195 /** the textposition at the last state to be included in yytext */
196 yy_pushbackPos = 0;
197
198 /** the current text position in the buffer */
199 yy_currentPos = 0;
200
201 /** startRead marks the beginning of the yytext() string in the buffer */
202 yy_startRead = 0;
203
204 /** endRead marks the last character in the buffer, that has been read
205 from input */
206 yy_endRead = 0;
207
208 /** number of newlines encountered up to the start of the matched text */
209 yyline = 0;
210
211 /** the number of characters up to the start of the matched text */
212 yychar = 0;
213
214 /**
215 * the number of characters from the last newline up to the start of the
216 * matched text
217 */
david_williams78e4db02006-06-07 22:58:51 +0000218// yycolumn = 0;
david_williamscfdb2cd2004-11-11 08:37:49 +0000219
220 /**
221 * yy_atBOL == true <=> the scanner is currently at the beginning of a line
222 */
223 yy_atBOL = false;
224
225 /** yy_atEOF == true <=> the scanner has returned a value for EOF */
226 yy_atEOF = false;
227
228 /* user variables */
229 // fUndefined.delete(0, fUndefined.length());
230 }
231
232 /* user method */
233 public CSSTokenizer() {
234 super();
235 }
236
237%}
238
239%state ST_CHARSET_NAME
240%state ST_CHARSET_DELIMITER
241%state ST_IMPORT_URI
242%state ST_IMPORT_MEDIUM
243%state ST_IMPORT_DELIMITER
244%state ST_MEDIA_MEDIUM
245%state ST_MEDIA_DELIMITER
246%state ST_PAGE_PSEUDO_PAGE
247%state ST_PAGE_DELIMITER
248%state ST_FONT_FACE_DELIMITER
249%state ST_SELECTOR
250%state ST_SELECTOR_MODIFIER
251%state ST_SELECTOR_ATTRIBUTE_NAME
252%state ST_SELECTOR_ATTRIBUTE_OPERATOR
253%state ST_SELECTOR_ATTRIBUTE_VALUE
254%state ST_SELECTOR_ATTRIBUTE_END
255%state ST_DECLARATION
256%state ST_DECLARATION_SEPARATOR
257%state ST_DECLARATION_PRE_VALUE
258%state ST_DECLARATION_VALUE
259
260h = [0-9a-f]
261nonascii = [\u0080-\uffff]
262unicode = \\{h}{1,6}[ \t\r\n\f]?
263escape = {unicode}|\\[ -~\u0080-\uffff]
david_williams78e4db02006-06-07 22:58:51 +0000264
265
266
267nmstart = [_a-zA-Z]|{nonascii}|{escape}
david_williamscfdb2cd2004-11-11 08:37:49 +0000268nmchar = [_a-zA-Z0-9-]|{nonascii}|{escape}
269string1 = \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\"
270string2 = \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\'
271
david_williams78e4db02006-06-07 22:58:51 +0000272ident = -?{nmstart}{nmchar}*
273
david_williamscfdb2cd2004-11-11 08:37:49 +0000274name = {nmchar}+
275num = [+-]?([0-9]+|[0-9]*"."[0-9]+)
276string = {string1}|{string2}
277url = ([ !#$%&*-~]|{nonascii}|{escape})*
278s = [ \t\r\n\f]
279w = {s}*
280nl = \n|\r\n|\r|\f
david_williams78e4db02006-06-07 22:58:51 +0000281
282//range = \?{1,6}|{h}(\?{0,5}|{h}(\?{0,4}|{h}(\?{0,3}|{h}(\?{0,2}|{h}(\??|{h})))))
david_williamscfdb2cd2004-11-11 08:37:49 +0000283
284hash = "#"{name}
285uri = ("url("{w}{string}{w}")"|"url("{w}{url}{w}")")
286function = {ident}"("
287unicode_range = "U"\+[0-9a-fA-F?]{1,6}("-"[0-9a-fA-F?]{1,6})?
288
289%%
290
291/*
292 * *** global ***
293 */
294
295{s}+ { return CSS_S; }
296"<!--" { return CSS_CDO; }
297"-->" { return CSS_CDC; }
298"}" { yybegin(YYINITIAL); return CSS_RBRACE; }
299\/\*[^*]*\*+([^/*][^*]*\*+)*\/ { return CSS_COMMENT; }
300
301//<YYINITIAL> {
302// "@import" { yybegin(ST_IMPORT_URI); return CSS_IMPORT; }
303//}
304
305/*
306 * *** charset rule ***
307 * CHARSET_SYM S* STRING S* ';'
308 */
309
310"@charset" { yybegin(ST_CHARSET_NAME); return CSS_CHARSET; }
311
312<ST_CHARSET_NAME> {
313 {string} { yybegin(ST_CHARSET_DELIMITER); return CSS_STRING; }
314}
315
316<ST_CHARSET_DELIMITER> {
317 ";" { yybegin(YYINITIAL); return CSS_DELIMITER; }
318}
319
320/*
321 * *** import rule ***
322 * IMPORT_SYM S* [STRING|URI] S* [ medium [ COMMA S* medium]* ]? ';' S*
323 */
324
325"@import" { yybegin(ST_IMPORT_URI); return CSS_IMPORT; }
326
327<ST_IMPORT_URI> {
328 {string} { yybegin(ST_IMPORT_MEDIUM); return CSS_STRING; }
329 // "url("{w}{string}{w}")" { yybegin(ST_IMPORT_MEDIUM); return CSS_URI; }
330 // "url("{w}{url}{w}")" { yybegin(ST_IMPORT_MEDIUM); return CSS_URI; }
331 {uri} { yybegin(ST_IMPORT_MEDIUM); return CSS_URI; }
332 ";" { yybegin(YYINITIAL); return CSS_DELIMITER; }
333}
334
335<ST_IMPORT_MEDIUM> {
336 {ident} { yybegin(ST_IMPORT_DELIMITER); return CSS_MEDIUM; }
337 ";" { yybegin(YYINITIAL); return CSS_DELIMITER; }
338}
339
340<ST_IMPORT_DELIMITER> {
341 ";" { yybegin(YYINITIAL); return CSS_DELIMITER; }
342 "," { yybegin(ST_IMPORT_MEDIUM); return CSS_MEDIA_SEPARATOR; }
343}
344
345/*
346 * *** media rule ***
347 * MEDIA_SYM S* medium [ COMMA S* medium ]* LBRACE S* ruleset* '}' S*
348 */
349
350"@media" { yybegin(ST_MEDIA_MEDIUM); return CSS_MEDIA; }
351
352/*
353 * medium
354 * IDENT S*
355 */
356<ST_MEDIA_MEDIUM> {
357 {ident} { yybegin(ST_MEDIA_DELIMITER); return CSS_MEDIUM; }
358}
359
360<ST_MEDIA_DELIMITER> {
361 "{" { yybegin(YYINITIAL); return CSS_LBRACE; }
362 "," { yybegin(ST_MEDIA_MEDIUM); return CSS_MEDIA_SEPARATOR; }
363}
364
365/*
366 * *** page rule **
367 * PAGE_SYM S* pseudo_page? S* LBRACE S* declaration [ ';' S* declaration ]* '}' S*
368 */
369
370"@page" { yybegin(ST_PAGE_PSEUDO_PAGE); return CSS_PAGE; }
371
372/*
373 * pseudo_page
374 * ':' IDENT
375 */
376
377<ST_PAGE_PSEUDO_PAGE> {
378 ":"?{ident} { yybegin(ST_PAGE_DELIMITER); return CSS_PAGE_SELECTOR; }
379 "{" { yybegin(ST_DECLARATION); return CSS_LBRACE; }
380}
381
382<ST_PAGE_DELIMITER> {
383 "{" { yybegin(ST_DECLARATION); return CSS_LBRACE; }
384}
385
386/*
387 * font-face
388 * FONT_FACE_SYM S* '{' S* declaration [ ';' S* declaration '* '}' S*
389 */
390
391"@font-face" { yybegin(ST_FONT_FACE_DELIMITER); return CSS_FONT_FACE; }
392
393<ST_FONT_FACE_DELIMITER> {
394 "{" { yybegin(ST_DECLARATION); return CSS_LBRACE; }
395}
396
397/*
398 * selector
399 * simple_selector [ combinator simple_selector ]*
400 */
401
402/*
403 * simple_selector
404 * element_name [ HASH | class | attrib | pseudo ]* | [ HASH | class | attrib | pseudo ]+
405 */
406
407<YYINITIAL, ST_SELECTOR_MODIFIER, ST_SELECTOR> {
408 "*" { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_UNIVERSAL; }
409 {hash} { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_ID; }
410// ":"{ident} { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_PSEUDO; }
411 ":"{ident}("("{s}*{ident}{s}*")")? { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_PSEUDO; }
412 "."{name} { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_CLASS; }
413 "[" { yybegin(ST_SELECTOR_ATTRIBUTE_NAME); return CSS_SELECTOR_ATTRIBUTE_START; }
414}
415
416<YYINITIAL, ST_SELECTOR> {
417 {ident} { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_ELEMENT_NAME; }
418}
419
420<ST_SELECTOR_MODIFIER> {
421 "," { yybegin(ST_SELECTOR); return CSS_SELECTOR_SEPARATOR; }
422 // using LOOKAHEAD
nitindb1aee262008-05-13 16:33:24 +0000423 {s}+/[^+>\{/] { yybegin(ST_SELECTOR); return CSS_SELECTOR_COMBINATOR; }
david_williamscfdb2cd2004-11-11 08:37:49 +0000424 "+"|">" { yybegin(ST_SELECTOR); return CSS_SELECTOR_COMBINATOR; }
425 "{" { yybegin(ST_DECLARATION); return CSS_LBRACE; }
426}
427
428/*
429 * attrib
430 * '[' S* IDENT S* [ [ '=' | INCLUDES | DASHMATCH ] S* [ IDENT | STRING ] S* ]? ']'
431 */
432
433<ST_SELECTOR_ATTRIBUTE_NAME> {
434 {ident} { yybegin(ST_SELECTOR_ATTRIBUTE_OPERATOR); return CSS_SELECTOR_ATTRIBUTE_NAME; }
435}
436
437<ST_SELECTOR_ATTRIBUTE_OPERATOR> {
438 "="|"~="|"|=" { yybegin(ST_SELECTOR_ATTRIBUTE_VALUE); return CSS_SELECTOR_ATTRIBUTE_OPERATOR; }
439 "]" { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_ATTRIBUTE_END; }
440}
441
442<ST_SELECTOR_ATTRIBUTE_VALUE> {
443 {ident}|{string} { yybegin(ST_SELECTOR_ATTRIBUTE_END); return CSS_SELECTOR_ATTRIBUTE_VALUE; }
444}
445
446<ST_SELECTOR_ATTRIBUTE_END> {
447 "]" { yybegin(ST_SELECTOR_MODIFIER); return CSS_SELECTOR_ATTRIBUTE_END; }
448}
449
450/*
451 * declaration
452 * property ':' S* expr prio? | // empty //
453 */
454
455<ST_DECLARATION> {
456 {ident} { yybegin(ST_DECLARATION_SEPARATOR); return CSS_DECLARATION_PROPERTY; }
457}
458
459<ST_DECLARATION_SEPARATOR> {
460 ":" { yybegin(ST_DECLARATION_PRE_VALUE); return CSS_DECLARATION_SEPARATOR; }
461}
462
463<ST_DECLARATION_PRE_VALUE, ST_DECLARATION_VALUE> {
464 "!"{s}*"important" { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_IMPORTANT; }
david_williams78e4db02006-06-07 22:58:51 +0000465
466
david_williamscfdb2cd2004-11-11 08:37:49 +0000467 ")" { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_PARENTHESIS_CLOSE; }
david_williams78e4db02006-06-07 22:58:51 +0000468
469 // ordered following two rules deliberately, see
470 // https://bugs.eclipse.org/bugs/show_bug.cgi?id=129902
david_williamscfdb2cd2004-11-11 08:37:49 +0000471 {num}{ident} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_DIMENSION; }
david_williams78e4db02006-06-07 22:58:51 +0000472 {ident} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_IDENT; }
473
474
david_williamscfdb2cd2004-11-11 08:37:49 +0000475 {num}"%" { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_PERCENTAGE; }
david_williams78e4db02006-06-07 22:58:51 +0000476
david_williamscfdb2cd2004-11-11 08:37:49 +0000477 {num} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_NUMBER; }
david_williams78e4db02006-06-07 22:58:51 +0000478
479
480
david_williamscfdb2cd2004-11-11 08:37:49 +0000481 {function} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_FUNCTION; }
482 {string} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_STRING; }
483 {uri} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_URI; }
484 "#"{name} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_HASH; }
485 {unicode_range} { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_UNICODE_RANGE; }
486 [,/] { yybegin(ST_DECLARATION_VALUE); return CSS_DECLARATION_VALUE_OPERATOR; }
487}
488
489<ST_DECLARATION_VALUE> {
490 {s}+/[^;}] { return CSS_DECLARATION_VALUE_S; }
491}
492
493<ST_DECLARATION, ST_DECLARATION_SEPARATOR, ST_DECLARATION_PRE_VALUE, ST_DECLARATION_VALUE> {
494 ";" { yybegin(ST_DECLARATION); return CSS_DECLARATION_DELIMITER; }
495 // "}" { yybegin(YYINITIAL); return CSS_RBRACE; }
496}
497
498
499//<YYINITIAL, ST_IMPORT_URI, ST_IMPORT_MEDIUM, ST_IMPORT_DELIMITER> {
500// \/\*[^*]*\*+([^/*][^*]*\*+)*\/ { return CSS_COMMENT; }
501// {s}+ { return CSS_S; }
502// . { return UNDEFINED; }
503//}
504
505//<YYINITIAL, ST_IMPORT_URI, ST_IMPORT_MEDIUM, ST_IMPORT_DELIMITER> {
506// [^ \t\r\n\f]+ { return CSS_UNKNOWN; }
507//}
508
509. {
510 return UNDEFINED;
511}