diff options
author | Curtis D'Entremont | 2007-01-10 18:21:20 +0000 |
---|---|---|
committer | Curtis D'Entremont | 2007-01-10 18:21:20 +0000 |
commit | 2924dad7e468b60d8723acd13d008da5cb079baf (patch) | |
tree | 0c3f0ffce2b55da93fd4470dbb247a5c2511ff16 /org.eclipse.help.base | |
parent | 9dff98b761e392f2dd4e1c294e27926c616f5259 (diff) | |
download | eclipse.platform.ua-2924dad7e468b60d8723acd13d008da5cb079baf.tar.gz eclipse.platform.ua-2924dad7e468b60d8723acd13d008da5cb079baf.tar.xz eclipse.platform.ua-2924dad7e468b60d8723acd13d008da5cb079baf.zip |
138383 [Help] Update Lucene and ship as a JAR
Diffstat (limited to 'org.eclipse.help.base')
14 files changed, 4113 insertions, 6 deletions
diff --git a/org.eclipse.help.base/.classpath b/org.eclipse.help.base/.classpath index d876f9193..2e51f1c6d 100644 --- a/org.eclipse.help.base/.classpath +++ b/org.eclipse.help.base/.classpath @@ -2,6 +2,7 @@ <classpath> <classpathentry kind="src" path="src"/> <classpathentry output="bin2" kind="src" path="src_ant"/> + <classpathentry kind="src" path="src_demo"/> <classpathentry kind="con" path="org.eclipse.jdt.launching.JRE_CONTAINER/org.eclipse.jdt.internal.debug.ui.launcher.StandardVMType/J2SE-1.4"/> <classpathentry kind="con" path="org.eclipse.pde.core.requiredPlugins"/> <classpathentry kind="output" path="bin"/> diff --git a/org.eclipse.help.base/META-INF/MANIFEST.MF b/org.eclipse.help.base/META-INF/MANIFEST.MF index 576410323..c22a2a1a1 100644 --- a/org.eclipse.help.base/META-INF/MANIFEST.MF +++ b/org.eclipse.help.base/META-INF/MANIFEST.MF @@ -6,7 +6,8 @@ Bundle-Version: 3.3.0.qualifier Bundle-Activator: org.eclipse.help.internal.base.HelpBasePlugin Bundle-Vendor: %providerName Bundle-Localization: plugin -Export-Package: org.eclipse.help.browser, +Export-Package: org.apache.lucene.demo.html;x-friends:="org.eclipse.pde.ui", + org.eclipse.help.browser, org.eclipse.help.internal.base;x-friends:="org.eclipse.help.ui,org.eclipse.help.webapp,org.eclipse.ua.tests", org.eclipse.help.internal.base.remote;x-friends:="org.eclipse.ua.tests,org.eclipse.help.webapp,org.eclipse.help.ui", org.eclipse.help.internal.base.util;x-friends:="org.eclipse.help.ui,org.eclipse.help.webapp,org.eclipse.ua.tests", @@ -20,11 +21,12 @@ Export-Package: org.eclipse.help.browser, org.eclipse.help.internal.xhtml;x-friends:="org.eclipse.help.ui,org.eclipse.ua.tests,org.eclipse.ui.intro,org.eclipse.help.webapp", org.eclipse.help.search, org.eclipse.help.standalone -Require-Bundle: org.apache.lucene;bundle-version="[1.4.3,1.5.0)";visibility:=reexport, - org.eclipse.help;bundle-version="[3.1.0,4.0.0)";visibility:=reexport, - org.eclipse.help.appserver;bundle-version="[3.1.0,4.0.0)", +Require-Bundle: org.apache.lucene;bundle-version="[1.4.3,2.0.0)";visibility:=reexport, + org.apache.lucene.analysis;bundle-version="[1.9.1,2.0.0)";resolution:=optional;visibility:=reexport, + org.eclipse.ant.core;bundle-version="[3.1.0,4.0.0)";resolution:=optional, org.eclipse.core.runtime;bundle-version="[3.1.0,4.0.0)", - org.eclipse.ant.core;bundle-version="[3.1.0,4.0.0)";resolution:=optional + org.eclipse.help;bundle-version="[3.1.0,4.0.0)";visibility:=reexport, + org.eclipse.help.appserver;bundle-version="[3.1.0,4.0.0)" Eclipse-LazyStart: true Import-Package: com.ibm.icu.text Bundle-RequiredExecutionEnvironment: J2SE-1.4 diff --git a/org.eclipse.help.base/build.properties b/org.eclipse.help.base/build.properties index 6d59bfb95..9905c7eb7 100644 --- a/org.eclipse.help.base/build.properties +++ b/org.eclipse.help.base/build.properties @@ -8,7 +8,8 @@ # Contributors: # IBM Corporation - initial API and implementation ############################################################################### -source..=src/ +source..=src/,\ + src_demo/ src.includes=schema/,about.html bin.includes = doc/,\ plugin.xml,\ diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Entities.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Entities.java new file mode 100644 index 000000000..348df1cc2 --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Entities.java @@ -0,0 +1,327 @@ +package org.apache.lucene.demo.html; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.*; + +public class Entities { + static final Hashtable decoder = new Hashtable(300); + static final String[] encoder = new String[0x100]; + + static final String decode(String entity) { + if (entity.charAt(entity.length()-1) == ';') // remove trailing semicolon + entity = entity.substring(0, entity.length()-1); + if (entity.charAt(1) == '#') { + int start = 2; + int radix = 10; + if (entity.charAt(2) == 'X' || entity.charAt(2) == 'x') { + start++; + radix = 16; + } + Character c = + new Character((char)Integer.parseInt(entity.substring(start), radix)); + return c.toString(); + } else { + String s = (String)decoder.get(entity); + if (s != null) + return s; + else return ""; //$NON-NLS-1$ + } + } + + public static final String encode(String s) { + int length = s.length(); + StringBuffer buffer = new StringBuffer(length * 2); + for (int i = 0; i < length; i++) { + char c = s.charAt(i); + int j = (int)c; + if (j < 0x100 && encoder[j] != null) { + buffer.append(encoder[j]); // have a named encoding + buffer.append(';'); + } else if (j < 0x80) { + buffer.append(c); // use ASCII value + } else { + buffer.append("&#"); // use numeric encoding //$NON-NLS-1$ + buffer.append((int)c); + buffer.append(';'); + } + } + return buffer.toString(); + } + + static final void add(String entity, int value) { + decoder.put(entity, (new Character((char)value)).toString()); + if (value < 0x100) + encoder[value] = entity; + } + + static { + add(" ", 160); //$NON-NLS-1$ + add("¡", 161); //$NON-NLS-1$ + add("¢", 162); //$NON-NLS-1$ + add("£", 163); //$NON-NLS-1$ + add("¤", 164); //$NON-NLS-1$ + add("¥", 165); //$NON-NLS-1$ + add("¦", 166); //$NON-NLS-1$ + add("§", 167); //$NON-NLS-1$ + add("¨", 168); //$NON-NLS-1$ + add("©", 169); //$NON-NLS-1$ + add("ª", 170); //$NON-NLS-1$ + add("«", 171); //$NON-NLS-1$ + add("¬", 172); //$NON-NLS-1$ + add("­", 173); //$NON-NLS-1$ + add("®", 174); //$NON-NLS-1$ + add("¯", 175); //$NON-NLS-1$ + add("°", 176); //$NON-NLS-1$ + add("±", 177); //$NON-NLS-1$ + add("²", 178); //$NON-NLS-1$ + add("³", 179); //$NON-NLS-1$ + add("´", 180); //$NON-NLS-1$ + add("µ", 181); //$NON-NLS-1$ + add("¶", 182); //$NON-NLS-1$ + add("·", 183); //$NON-NLS-1$ + add("¸", 184); //$NON-NLS-1$ + add("¹", 185); //$NON-NLS-1$ + add("º", 186); //$NON-NLS-1$ + add("»", 187); //$NON-NLS-1$ + add("¼", 188); //$NON-NLS-1$ + add("½", 189); //$NON-NLS-1$ + add("¾", 190); //$NON-NLS-1$ + add("¿", 191); //$NON-NLS-1$ + add("À", 192); //$NON-NLS-1$ + add("Á", 193); //$NON-NLS-1$ + add("Â", 194); //$NON-NLS-1$ + add("Ã", 195); //$NON-NLS-1$ + add("Ä", 196); //$NON-NLS-1$ + add("Å", 197); //$NON-NLS-1$ + add("Æ", 198); //$NON-NLS-1$ + add("Ç", 199); //$NON-NLS-1$ + add("È", 200); //$NON-NLS-1$ + add("É", 201); //$NON-NLS-1$ + add("Ê", 202); //$NON-NLS-1$ + add("Ë", 203); //$NON-NLS-1$ + add("Ì", 204); //$NON-NLS-1$ + add("Í", 205); //$NON-NLS-1$ + add("Î", 206); //$NON-NLS-1$ + add("Ï", 207); //$NON-NLS-1$ + add("Ð", 208); //$NON-NLS-1$ + add("Ñ", 209); //$NON-NLS-1$ + add("Ò", 210); //$NON-NLS-1$ + add("Ó", 211); //$NON-NLS-1$ + add("Ô", 212); //$NON-NLS-1$ + add("Õ", 213); //$NON-NLS-1$ + add("Ö", 214); //$NON-NLS-1$ + add("×", 215); //$NON-NLS-1$ + add("Ø", 216); //$NON-NLS-1$ + add("Ù", 217); //$NON-NLS-1$ + add("Ú", 218); //$NON-NLS-1$ + add("Û", 219); //$NON-NLS-1$ + add("Ü", 220); //$NON-NLS-1$ + add("Ý", 221); //$NON-NLS-1$ + add("Þ", 222); //$NON-NLS-1$ + add("ß", 223); //$NON-NLS-1$ + add("à", 224); //$NON-NLS-1$ + add("á", 225); //$NON-NLS-1$ + add("â", 226); //$NON-NLS-1$ + add("ã", 227); //$NON-NLS-1$ + add("ä", 228); //$NON-NLS-1$ + add("å", 229); //$NON-NLS-1$ + add("æ", 230); //$NON-NLS-1$ + add("ç", 231); //$NON-NLS-1$ + add("è", 232); //$NON-NLS-1$ + add("é", 233); //$NON-NLS-1$ + add("ê", 234); //$NON-NLS-1$ + add("ë", 235); //$NON-NLS-1$ + add("ì", 236); //$NON-NLS-1$ + add("í", 237); //$NON-NLS-1$ + add("î", 238); //$NON-NLS-1$ + add("ï", 239); //$NON-NLS-1$ + add("ð", 240); //$NON-NLS-1$ + add("ñ", 241); //$NON-NLS-1$ + add("ò", 242); //$NON-NLS-1$ + add("ó", 243); //$NON-NLS-1$ + add("ô", 244); //$NON-NLS-1$ + add("õ", 245); //$NON-NLS-1$ + add("ö", 246); //$NON-NLS-1$ + add("÷", 247); //$NON-NLS-1$ + add("ø", 248); //$NON-NLS-1$ + add("ù", 249); //$NON-NLS-1$ + add("ú", 250); //$NON-NLS-1$ + add("û", 251); //$NON-NLS-1$ + add("ü", 252); //$NON-NLS-1$ + add("ý", 253); //$NON-NLS-1$ + add("þ", 254); //$NON-NLS-1$ + add("ÿ", 255); //$NON-NLS-1$ + add("&fnof", 402); //$NON-NLS-1$ + add("&Alpha", 913); //$NON-NLS-1$ + add("&Beta", 914); //$NON-NLS-1$ + add("&Gamma", 915); //$NON-NLS-1$ + add("&Delta", 916); //$NON-NLS-1$ + add("&Epsilon",917); //$NON-NLS-1$ + add("&Zeta", 918); //$NON-NLS-1$ + add("&Eta", 919); //$NON-NLS-1$ + add("&Theta", 920); //$NON-NLS-1$ + add("&Iota", 921); //$NON-NLS-1$ + add("&Kappa", 922); //$NON-NLS-1$ + add("&Lambda", 923); //$NON-NLS-1$ + add("&Mu", 924); //$NON-NLS-1$ + add("&Nu", 925); //$NON-NLS-1$ + add("&Xi", 926); //$NON-NLS-1$ + add("&Omicron",927); //$NON-NLS-1$ + add("&Pi", 928); //$NON-NLS-1$ + add("&Rho", 929); //$NON-NLS-1$ + add("&Sigma", 931); //$NON-NLS-1$ + add("&Tau", 932); //$NON-NLS-1$ + add("&Upsilon",933); //$NON-NLS-1$ + add("&Phi", 934); //$NON-NLS-1$ + add("&Chi", 935); //$NON-NLS-1$ + add("&Psi", 936); //$NON-NLS-1$ + add("&Omega", 937); //$NON-NLS-1$ + add("&alpha", 945); //$NON-NLS-1$ + add("&beta", 946); //$NON-NLS-1$ + add("&gamma", 947); //$NON-NLS-1$ + add("&delta", 948); //$NON-NLS-1$ + add("&epsilon",949); //$NON-NLS-1$ + add("&zeta", 950); //$NON-NLS-1$ + add("&eta", 951); //$NON-NLS-1$ + add("&theta", 952); //$NON-NLS-1$ + add("&iota", 953); //$NON-NLS-1$ + add("&kappa", 954); //$NON-NLS-1$ + add("&lambda", 955); //$NON-NLS-1$ + add("&mu", 956); //$NON-NLS-1$ + add("&nu", 957); //$NON-NLS-1$ + add("&xi", 958); //$NON-NLS-1$ + add("&omicron",959); //$NON-NLS-1$ + add("&pi", 960); //$NON-NLS-1$ + add("&rho", 961); //$NON-NLS-1$ + add("&sigmaf", 962); //$NON-NLS-1$ + add("&sigma", 963); //$NON-NLS-1$ + add("&tau", 964); //$NON-NLS-1$ + add("&upsilon",965); //$NON-NLS-1$ + add("&phi", 966); //$NON-NLS-1$ + add("&chi", 967); //$NON-NLS-1$ + add("&psi", 968); //$NON-NLS-1$ + add("&omega", 969); //$NON-NLS-1$ + add("&thetasym",977); //$NON-NLS-1$ + add("&upsih", 978); //$NON-NLS-1$ + add("&piv", 982); //$NON-NLS-1$ + add("&bull", 8226); //$NON-NLS-1$ + add("&hellip", 8230); //$NON-NLS-1$ + add("&prime", 8242); //$NON-NLS-1$ + add("&Prime", 8243); //$NON-NLS-1$ + add("&oline", 8254); //$NON-NLS-1$ + add("&frasl", 8260); //$NON-NLS-1$ + add("&weierp", 8472); //$NON-NLS-1$ + add("&image", 8465); //$NON-NLS-1$ + add("&real", 8476); //$NON-NLS-1$ + add("&trade", 8482); //$NON-NLS-1$ + add("&alefsym",8501); //$NON-NLS-1$ + add("&larr", 8592); //$NON-NLS-1$ + add("&uarr", 8593); //$NON-NLS-1$ + add("&rarr", 8594); //$NON-NLS-1$ + add("&darr", 8595); //$NON-NLS-1$ + add("&harr", 8596); //$NON-NLS-1$ + add("&crarr", 8629); //$NON-NLS-1$ + add("&lArr", 8656); //$NON-NLS-1$ + add("&uArr", 8657); //$NON-NLS-1$ + add("&rArr", 8658); //$NON-NLS-1$ + add("&dArr", 8659); //$NON-NLS-1$ + add("&hArr", 8660); //$NON-NLS-1$ + add("&forall", 8704); //$NON-NLS-1$ + add("&part", 8706); //$NON-NLS-1$ + add("&exist", 8707); //$NON-NLS-1$ + add("&empty", 8709); //$NON-NLS-1$ + add("&nabla", 8711); //$NON-NLS-1$ + add("&isin", 8712); //$NON-NLS-1$ + add("¬in", 8713); //$NON-NLS-1$ + add("&ni", 8715); //$NON-NLS-1$ + add("&prod", 8719); //$NON-NLS-1$ + add("&sum", 8721); //$NON-NLS-1$ + add("&minus", 8722); //$NON-NLS-1$ + add("&lowast", 8727); //$NON-NLS-1$ + add("&radic", 8730); //$NON-NLS-1$ + add("&prop", 8733); //$NON-NLS-1$ + add("&infin", 8734); //$NON-NLS-1$ + add("&ang", 8736); //$NON-NLS-1$ + add("&and", 8743); //$NON-NLS-1$ + add("&or", 8744); //$NON-NLS-1$ + add("&cap", 8745); //$NON-NLS-1$ + add("&cup", 8746); //$NON-NLS-1$ + add("&int", 8747); //$NON-NLS-1$ + add("&there4", 8756); //$NON-NLS-1$ + add("&sim", 8764); //$NON-NLS-1$ + add("&cong", 8773); //$NON-NLS-1$ + add("&asymp", 8776); //$NON-NLS-1$ + add("&ne", 8800); //$NON-NLS-1$ + add("&equiv", 8801); //$NON-NLS-1$ + add("&le", 8804); //$NON-NLS-1$ + add("&ge", 8805); //$NON-NLS-1$ + add("&sub", 8834); //$NON-NLS-1$ + add("&sup", 8835); //$NON-NLS-1$ + add("&nsub", 8836); //$NON-NLS-1$ + add("&sube", 8838); //$NON-NLS-1$ + add("&supe", 8839); //$NON-NLS-1$ + add("&oplus", 8853); //$NON-NLS-1$ + add("&otimes", 8855); //$NON-NLS-1$ + add("&perp", 8869); //$NON-NLS-1$ + add("&sdot", 8901); //$NON-NLS-1$ + add("&lceil", 8968); //$NON-NLS-1$ + add("&rceil", 8969); //$NON-NLS-1$ + add("&lfloor", 8970); //$NON-NLS-1$ + add("&rfloor", 8971); //$NON-NLS-1$ + add("&lang", 9001); //$NON-NLS-1$ + add("&rang", 9002); //$NON-NLS-1$ + add("&loz", 9674); //$NON-NLS-1$ + add("&spades", 9824); //$NON-NLS-1$ + add("&clubs", 9827); //$NON-NLS-1$ + add("&hearts", 9829); //$NON-NLS-1$ + add("&diams", 9830); //$NON-NLS-1$ + add(""", 34); //$NON-NLS-1$ + add("&", 38); //$NON-NLS-1$ + add("<", 60); //$NON-NLS-1$ + add(">", 62); //$NON-NLS-1$ + add("&OElig", 338); //$NON-NLS-1$ + add("&oelig", 339); //$NON-NLS-1$ + add("&Scaron", 352); //$NON-NLS-1$ + add("&scaron", 353); //$NON-NLS-1$ + add("&Yuml", 376); //$NON-NLS-1$ + add("&circ", 710); //$NON-NLS-1$ + add("&tilde", 732); //$NON-NLS-1$ + add("&ensp", 8194); //$NON-NLS-1$ + add("&emsp", 8195); //$NON-NLS-1$ + add("&thinsp", 8201); //$NON-NLS-1$ + add("&zwnj", 8204); //$NON-NLS-1$ + add("&zwj", 8205); //$NON-NLS-1$ + add("&lrm", 8206); //$NON-NLS-1$ + add("&rlm", 8207); //$NON-NLS-1$ + add("&ndash", 8211); //$NON-NLS-1$ + add("&mdash", 8212); //$NON-NLS-1$ + add("&lsquo", 8216); //$NON-NLS-1$ + add("&rsquo", 8217); //$NON-NLS-1$ + add("&sbquo", 8218); //$NON-NLS-1$ + add("&ldquo", 8220); //$NON-NLS-1$ + add("&rdquo", 8221); //$NON-NLS-1$ + add("&bdquo", 8222); //$NON-NLS-1$ + add("&dagger", 8224); //$NON-NLS-1$ + add("&Dagger", 8225); //$NON-NLS-1$ + add("&permil", 8240); //$NON-NLS-1$ + add("&lsaquo", 8249); //$NON-NLS-1$ + add("&rsaquo", 8250); //$NON-NLS-1$ + add("&euro", 8364); //$NON-NLS-1$ + + } +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java new file mode 100644 index 000000000..bfcc2e91a --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.java @@ -0,0 +1,748 @@ +/* Generated By:JavaCC: Do not edit this line. HTMLParser.java */ +package org.apache.lucene.demo.html; + +import java.io.*; +import java.util.Properties; + +public class HTMLParser implements HTMLParserConstants { + public static int SUMMARY_LENGTH = 200; + + StringBuffer title = new StringBuffer(SUMMARY_LENGTH); + StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2); + Properties metaTags=new Properties(); + String currentMetaTag=null; + String currentMetaContent=null; + int length = 0; + boolean titleComplete = false; + boolean inTitle = false; + boolean inMetaTag = false; + boolean inStyle = false; + boolean afterTag = false; + boolean afterSpace = false; + String eol = System.getProperty("line.separator"); //$NON-NLS-1$ + Reader pipeIn = null; + Writer pipeOut; + private MyPipedInputStream pipeInStream = null; + private PipedOutputStream pipeOutStream = null; + + private class MyPipedInputStream extends PipedInputStream{ + + public MyPipedInputStream(){ + super(); + } + + public MyPipedInputStream(PipedOutputStream src) throws IOException{ + super(src); + } + + public boolean full() throws IOException{ + return this.available() >= PipedInputStream.PIPE_SIZE; + } + } + + /** + * @deprecated Use HTMLParser(FileInputStream) instead + */ + public HTMLParser(File file) throws FileNotFoundException { + this(new FileInputStream(file)); + } + + public String getTitle() throws IOException, InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (titleComplete || pipeInStream.full()) + break; + wait(10); + } + } + return title.toString().trim(); + } + + public Properties getMetaTags() throws IOException, +InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (titleComplete || pipeInStream.full()) + break; + wait(10); + } + } + return metaTags; + } + + + public String getSummary() throws IOException, InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full()) + break; + wait(10); + } + } + if (summary.length() > SUMMARY_LENGTH) + summary.setLength(SUMMARY_LENGTH); + + String sum = summary.toString().trim(); + String tit = getTitle(); + if (sum.startsWith(tit)) // don't repeat title in summary + return sum.substring(tit.length()).trim(); + else + return sum; + } + + public Reader getReader() throws IOException { + if (pipeIn == null) { + pipeInStream = new MyPipedInputStream(); + pipeOutStream = new PipedOutputStream(pipeInStream); + pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE"); //$NON-NLS-1$ + pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE"); //$NON-NLS-1$ + + Thread thread = new ParserThread(this); + thread.start(); // start parsing + } + + return pipeIn; + } + + void addToSummary(String text) { + if (summary.length() < SUMMARY_LENGTH) { + summary.append(text); + if (summary.length() >= SUMMARY_LENGTH) { + synchronized(this) { + notifyAll(); + } + } + } + } + + void addText(String text) throws IOException { + if (inStyle) + return; + if (inTitle) + title.append(text); + else { + addToSummary(text); + if (!titleComplete && !title.equals("")) { // finished title //$NON-NLS-1$ + synchronized(this) { + titleComplete = true; // tell waiting threads + notifyAll(); + } + } + } + + length += text.length(); + pipeOut.write(text); + + afterSpace = false; + } + + void addMetaTag() throws IOException { + metaTags.setProperty(currentMetaTag, currentMetaContent); + if (currentMetaTag.equalsIgnoreCase("keywords")) { //$NON-NLS-1$ + pipeOut.write(currentMetaContent); + } + currentMetaTag = null; + currentMetaContent = null; + return; + } + + void addSpace() throws IOException { + if (!afterSpace) { + if (inTitle) + title.append(" "); //$NON-NLS-1$ + else + addToSummary(" "); //$NON-NLS-1$ + + String space = afterTag ? eol : " "; //$NON-NLS-1$ + length += space.length(); + pipeOut.write(space); + afterSpace = true; + } + } + + final public void HTMLDocument() throws ParseException, IOException { + Token t; + label_1: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ScriptStart: + case TagName: + case DeclName: + case Comment1: + case Comment2: + case Word: + case Entity: + case Space: + case Punct: + ; + break; + default: + jj_la1[0] = jj_gen; + break label_1; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case TagName: + Tag(); + afterTag = true; + break; + case DeclName: + t = Decl(); + afterTag = true; + break; + case Comment1: + case Comment2: + CommentTag(); + afterTag = true; + break; + case ScriptStart: + ScriptTag(); + afterTag = true; + break; + case Word: + t = jj_consume_token(Word); + addText(t.image); afterTag = false; + break; + case Entity: + t = jj_consume_token(Entity); + addText(Entities.decode(t.image)); afterTag = false; + break; + case Punct: + t = jj_consume_token(Punct); + addText(t.image); afterTag = false; + break; + case Space: + jj_consume_token(Space); + addSpace(); afterTag = false; + break; + default: + jj_la1[1] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + jj_consume_token(0); + } + + final public void Tag() throws ParseException, IOException { + Token t1, t2; + boolean inImg = false; + t1 = jj_consume_token(TagName); + String tagName = t1.image.toLowerCase(); + if(Tags.WS_ELEMS.contains(tagName) ) { + addSpace(); + } + inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE> //$NON-NLS-1$ + inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META> //$NON-NLS-1$ + inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE> //$NON-NLS-1$ + inImg = tagName.equalsIgnoreCase("<img"); // keep track if in <IMG> //$NON-NLS-1$ + + label_2: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgName: + ; + break; + default: + jj_la1[2] = jj_gen; + break label_2; + } + t1 = jj_consume_token(ArgName); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgEquals: + jj_consume_token(ArgEquals); + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgValue: + case ArgQuote1: + case ArgQuote2: + t2 = ArgValue(); + if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null) //$NON-NLS-1$ + addText("[" + t2.image + "]"); //$NON-NLS-1$ //$NON-NLS-2$ + + if(inMetaTag && + ( t1.image.equalsIgnoreCase("name") || //$NON-NLS-1$ + t1.image.equalsIgnoreCase("HTTP-EQUIV") //$NON-NLS-1$ + ) + && t2 != null) + { + currentMetaTag=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } + } + if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != //$NON-NLS-1$ +null) + { + currentMetaContent=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } + } + break; + default: + jj_la1[3] = jj_gen; + ; + } + break; + default: + jj_la1[4] = jj_gen; + ; + } + } + jj_consume_token(TagEnd); + } + + final public Token ArgValue() throws ParseException { + Token t = null; + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgValue: + t = jj_consume_token(ArgValue); + {if (true) return t;} + break; + default: + jj_la1[5] = jj_gen; + if (jj_2_1(2)) { + jj_consume_token(ArgQuote1); + jj_consume_token(CloseQuote1); + {if (true) return t;} + } else { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgQuote1: + jj_consume_token(ArgQuote1); + t = jj_consume_token(Quote1Text); + jj_consume_token(CloseQuote1); + {if (true) return t;} + break; + default: + jj_la1[6] = jj_gen; + if (jj_2_2(2)) { + jj_consume_token(ArgQuote2); + jj_consume_token(CloseQuote2); + {if (true) return t;} + } else { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgQuote2: + jj_consume_token(ArgQuote2); + t = jj_consume_token(Quote2Text); + jj_consume_token(CloseQuote2); + {if (true) return t;} + break; + default: + jj_la1[7] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + } + } + } + throw new Error("Missing return statement in function"); //$NON-NLS-1$ + } + + final public Token Decl() throws ParseException { + Token t; + t = jj_consume_token(DeclName); + label_3: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgName: + case ArgEquals: + case ArgValue: + case ArgQuote1: + case ArgQuote2: + ; + break; + default: + jj_la1[8] = jj_gen; + break label_3; + } + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ArgName: + jj_consume_token(ArgName); + break; + case ArgValue: + case ArgQuote1: + case ArgQuote2: + ArgValue(); + break; + case ArgEquals: + jj_consume_token(ArgEquals); + break; + default: + jj_la1[9] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + jj_consume_token(TagEnd); + {if (true) return t;} + throw new Error("Missing return statement in function"); //$NON-NLS-1$ + } + + final public void CommentTag() throws ParseException { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case Comment1: + jj_consume_token(Comment1); + label_4: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CommentText1: + ; + break; + default: + jj_la1[10] = jj_gen; + break label_4; + } + jj_consume_token(CommentText1); + } + jj_consume_token(CommentEnd1); + break; + case Comment2: + jj_consume_token(Comment2); + label_5: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case CommentText2: + ; + break; + default: + jj_la1[11] = jj_gen; + break label_5; + } + jj_consume_token(CommentText2); + } + jj_consume_token(CommentEnd2); + break; + default: + jj_la1[12] = jj_gen; + jj_consume_token(-1); + throw new ParseException(); + } + } + + final public void ScriptTag() throws ParseException { + jj_consume_token(ScriptStart); + label_6: + while (true) { + switch ((jj_ntk==-1)?jj_ntk():jj_ntk) { + case ScriptText: + ; + break; + default: + jj_la1[13] = jj_gen; + break label_6; + } + jj_consume_token(ScriptText); + } + jj_consume_token(ScriptEnd); + } + + final private boolean jj_2_1(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_1(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(0, xla); } + } + + final private boolean jj_2_2(int xla) { + jj_la = xla; jj_lastpos = jj_scanpos = token; + try { return !jj_3_2(); } + catch(LookaheadSuccess ls) { return true; } + finally { jj_save(1, xla); } + } + + final private boolean jj_3_2() { + if (jj_scan_token(ArgQuote2)) return true; + if (jj_scan_token(CloseQuote2)) return true; + return false; + } + + final private boolean jj_3_1() { + if (jj_scan_token(ArgQuote1)) return true; + if (jj_scan_token(CloseQuote1)) return true; + return false; + } + + public HTMLParserTokenManager token_source; + SimpleCharStream jj_input_stream; + public Token token, jj_nt; + private int jj_ntk; + private Token jj_scanpos, jj_lastpos; + private int jj_la; + public boolean lookingAhead = false; + private int jj_gen; + final private int[] jj_la1 = new int[14]; + static private int[] jj_la1_0; + static { + jj_la1_0(); + } + private static void jj_la1_0() { + jj_la1_0 = new int[] {0x2c7e,0x2c7e,0x10000,0x380000,0x20000,0x80000,0x100000,0x200000,0x3b0000,0x3b0000,0x8000000,0x20000000,0x30,0x4000,}; + } + final private JJCalls[] jj_2_rtns = new JJCalls[2]; + private boolean jj_rescan = false; + private int jj_gc = 0; + + public HTMLParser(java.io.InputStream stream) { + this(stream, null); + } + public HTMLParser(java.io.InputStream stream, String encoding) { + try { jj_input_stream = new SimpleCharStream(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source = new HTMLParserTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 14; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public void ReInit(java.io.InputStream stream) { + ReInit(stream, null); + } + public void ReInit(java.io.InputStream stream, String encoding) { + try { jj_input_stream.ReInit(stream, encoding, 1, 1); } catch(java.io.UnsupportedEncodingException e) { throw new RuntimeException(e); } + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 14; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public HTMLParser(java.io.Reader stream) { + jj_input_stream = new SimpleCharStream(stream, 1, 1); + token_source = new HTMLParserTokenManager(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 14; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public void ReInit(java.io.Reader stream) { + jj_input_stream.ReInit(stream, 1, 1); + token_source.ReInit(jj_input_stream); + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 14; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public HTMLParser(HTMLParserTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 14; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + public void ReInit(HTMLParserTokenManager tm) { + token_source = tm; + token = new Token(); + jj_ntk = -1; + jj_gen = 0; + for (int i = 0; i < 14; i++) jj_la1[i] = -1; + for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls(); + } + + final private Token jj_consume_token(int kind) throws ParseException { + Token oldToken; + if ((oldToken = token).next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + if (token.kind == kind) { + jj_gen++; + if (++jj_gc > 100) { + jj_gc = 0; + for (int i = 0; i < jj_2_rtns.length; i++) { + JJCalls c = jj_2_rtns[i]; + while (c != null) { + if (c.gen < jj_gen) c.first = null; + c = c.next; + } + } + } + return token; + } + token = oldToken; + jj_kind = kind; + throw generateParseException(); + } + + static private final class LookaheadSuccess extends java.lang.Error { + private static final long serialVersionUID = 1L; + } + final private LookaheadSuccess jj_ls = new LookaheadSuccess(); + final private boolean jj_scan_token(int kind) { + if (jj_scanpos == jj_lastpos) { + jj_la--; + if (jj_scanpos.next == null) { + jj_lastpos = jj_scanpos = jj_scanpos.next = token_source.getNextToken(); + } else { + jj_lastpos = jj_scanpos = jj_scanpos.next; + } + } else { + jj_scanpos = jj_scanpos.next; + } + if (jj_rescan) { + int i = 0; Token tok = token; + while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; } + if (tok != null) jj_add_error_token(kind, i); + } + if (jj_scanpos.kind != kind) return true; + if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls; + return false; + } + + final public Token getNextToken() { + if (token.next != null) token = token.next; + else token = token.next = token_source.getNextToken(); + jj_ntk = -1; + jj_gen++; + return token; + } + + final public Token getToken(int index) { + Token t = lookingAhead ? jj_scanpos : token; + for (int i = 0; i < index; i++) { + if (t.next != null) t = t.next; + else t = t.next = token_source.getNextToken(); + } + return t; + } + + final private int jj_ntk() { + if ((jj_nt=token.next) == null) + return (jj_ntk = (token.next=token_source.getNextToken()).kind); + else + return (jj_ntk = jj_nt.kind); + } + + private java.util.Vector jj_expentries = new java.util.Vector(); + private int[] jj_expentry; + private int jj_kind = -1; + private int[] jj_lasttokens = new int[100]; + private int jj_endpos; + + private void jj_add_error_token(int kind, int pos) { + if (pos >= 100) return; + if (pos == jj_endpos + 1) { + jj_lasttokens[jj_endpos++] = kind; + } else if (jj_endpos != 0) { + jj_expentry = new int[jj_endpos]; + for (int i = 0; i < jj_endpos; i++) { + jj_expentry[i] = jj_lasttokens[i]; + } + boolean exists = false; + for (java.util.Enumeration e = jj_expentries.elements(); e.hasMoreElements();) { + int[] oldentry = (int[])(e.nextElement()); + if (oldentry.length == jj_expentry.length) { + exists = true; + for (int i = 0; i < jj_expentry.length; i++) { + if (oldentry[i] != jj_expentry[i]) { + exists = false; + break; + } + } + if (exists) break; + } + } + if (!exists) jj_expentries.addElement(jj_expentry); + if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind; + } + } + + public ParseException generateParseException() { + jj_expentries.removeAllElements(); + boolean[] la1tokens = new boolean[31]; + for (int i = 0; i < 31; i++) { + la1tokens[i] = false; + } + if (jj_kind >= 0) { + la1tokens[jj_kind] = true; + jj_kind = -1; + } + for (int i = 0; i < 14; i++) { + if (jj_la1[i] == jj_gen) { + for (int j = 0; j < 32; j++) { + if ((jj_la1_0[i] & (1<<j)) != 0) { + la1tokens[j] = true; + } + } + } + } + for (int i = 0; i < 31; i++) { + if (la1tokens[i]) { + jj_expentry = new int[1]; + jj_expentry[0] = i; + jj_expentries.addElement(jj_expentry); + } + } + jj_endpos = 0; + jj_rescan_token(); + jj_add_error_token(0, 0); + int[][] exptokseq = new int[jj_expentries.size()][]; + for (int i = 0; i < jj_expentries.size(); i++) { + exptokseq[i] = (int[])jj_expentries.elementAt(i); + } + return new ParseException(token, exptokseq, tokenImage); + } + + final public void enable_tracing() { + } + + final public void disable_tracing() { + } + + final private void jj_rescan_token() { + jj_rescan = true; + for (int i = 0; i < 2; i++) { + try { + JJCalls p = jj_2_rtns[i]; + do { + if (p.gen > jj_gen) { + jj_la = p.arg; jj_lastpos = jj_scanpos = p.first; + switch (i) { + case 0: jj_3_1(); break; + case 1: jj_3_2(); break; + } + } + p = p.next; + } while (p != null); + } catch(LookaheadSuccess ls) { } + } + jj_rescan = false; + } + + final private void jj_save(int index, int xla) { + JJCalls p = jj_2_rtns[index]; + while (p.gen > jj_gen) { + if (p.next == null) { p = p.next = new JJCalls(); break; } + p = p.next; + } + p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla; + } + + static final class JJCalls { + int gen; + Token first; + int arg; + JJCalls next; + } + +// void handleException(Exception e) { +// System.out.println(e.toString()); // print the error message +// System.out.println("Skipping..."); +// Token t; +// do { +// t = getNextToken(); +// } while (t.kind != TagEnd); +// } +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.jj b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.jj new file mode 100644 index 000000000..38c844f94 --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParser.jj @@ -0,0 +1,407 @@ +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright (c) 2003, 2006 IBM Corp. + * All rights reserved. + * + * Contributors: + * Apache Software Foundation - Initial contribution + * Konrad Kolosowski, IBM - skipping XML decl, merging meta keywords + * content with main text, encoding read and write to piped stream, + * returning summary if starts with title. + * Curtis d'Entremont, IBM - added missing serialVersionUIDs, removed + * unused imports, labels, variables, methods from javacc *generated* + * code, strip title off summary. + */ + +// HTMLParser.jj + +options { + STATIC = false; + OPTIMIZE_TOKEN_MANAGER = true; + //DEBUG_LOOKAHEAD = true; + //DEBUG_TOKEN_MANAGER = true; + UNICODE_INPUT = true; +} + +PARSER_BEGIN(HTMLParser) + +package org.apache.lucene.demo.html; + +import java.io.*; +import java.util.Properties; + +public class HTMLParser { + public static int SUMMARY_LENGTH = 200; + + StringBuffer title = new StringBuffer(SUMMARY_LENGTH); + StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2); + Properties metaTags=new Properties(); + String currentMetaTag=null; + String currentMetaContent=null; + int length = 0; + boolean titleComplete = false; + boolean inTitle = false; + boolean inMetaTag = false; + boolean inStyle = false; + boolean afterTag = false; + boolean afterSpace = false; + String eol = System.getProperty("line.separator"); + Reader pipeIn = null; + Writer pipeOut; + private MyPipedInputStream pipeInStream = null; + private PipedOutputStream pipeOutStream = null; + + private class MyPipedInputStream extends PipedInputStream{ + + public MyPipedInputStream(){ + super(); + } + + public MyPipedInputStream(PipedOutputStream src) throws IOException{ + super(src); + } + + public boolean full() throws IOException{ + return this.available() >= PipedInputStream.PIPE_SIZE; + } + } + + /** + * @deprecated Use HTMLParser(FileInputStream) instead + */ + public HTMLParser(File file) throws FileNotFoundException { + this(new FileInputStream(file)); + } + + public String getTitle() throws IOException, InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (titleComplete || pipeInStream.full()) + break; + wait(10); + } + } + return title.toString().trim(); + } + + public Properties getMetaTags() throws IOException, +InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (titleComplete || pipeInStream.full()) + break; + wait(10); + } + } + return metaTags; + } + + + public String getSummary() throws IOException, InterruptedException { + if (pipeIn == null) + getReader(); // spawn parsing thread + while (true) { + synchronized(this) { + if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full()) + break; + wait(10); + } + } + if (summary.length() > SUMMARY_LENGTH) + summary.setLength(SUMMARY_LENGTH); + + String sum = summary.toString().trim(); + String tit = getTitle(); + if (sum.startsWith(tit)) // don't repeat title in summary + return sum.substring(tit.length()).trim(); + else + return sum; + } + + public Reader getReader() throws IOException { + if (pipeIn == null) { + pipeInStream = new MyPipedInputStream(); + pipeOutStream = new PipedOutputStream(pipeInStream); + pipeIn = new InputStreamReader(pipeInStream, "UTF-16BE"); + pipeOut = new OutputStreamWriter(pipeOutStream, "UTF-16BE"); + + Thread thread = new ParserThread(this); + thread.start(); // start parsing + } + + return pipeIn; + } + + void addToSummary(String text) { + if (summary.length() < SUMMARY_LENGTH) { + summary.append(text); + if (summary.length() >= SUMMARY_LENGTH) { + synchronized(this) { + notifyAll(); + } + } + } + } + + void addText(String text) throws IOException { + if (inStyle) + return; + if (inTitle) + title.append(text); + else { + addToSummary(text); + if (!titleComplete && !title.equals("")) { // finished title + synchronized(this) { + titleComplete = true; // tell waiting threads + notifyAll(); + } + } + } + + length += text.length(); + pipeOut.write(text); + + afterSpace = false; + } + + void addMetaTag() throws IOException { + metaTags.setProperty(currentMetaTag, currentMetaContent); + if (currentMetaTag.equalsIgnoreCase("keywords")) { + pipeOut.write(currentMetaContent); + } + currentMetaTag = null; + currentMetaContent = null; + return; + } + + void addSpace() throws IOException { + if (!afterSpace) { + if (inTitle) + title.append(" "); + else + addToSummary(" "); + + String space = afterTag ? eol : " "; + length += space.length(); + pipeOut.write(space); + afterSpace = true; + } + } + +// void handleException(Exception e) { +// System.out.println(e.toString()); // print the error message +// System.out.println("Skipping..."); +// Token t; +// do { +// t = getNextToken(); +// } while (t.kind != TagEnd); +// } +} + +PARSER_END(HTMLParser) + + +void HTMLDocument() throws IOException : +{ + Token t; +} +{ +// try { + ( Tag() { afterTag = true; } + | t=Decl() { afterTag = true; } + | CommentTag() { afterTag = true; } + | ScriptTag() { afterTag = true; } + | t=<Word> { addText(t.image); afterTag = false; } + | t=<Entity> { addText(Entities.decode(t.image)); afterTag = false; } + | t=<Punct> { addText(t.image); afterTag = false; } + | <Space> { addSpace(); afterTag = false; } + )* <EOF> +// } catch (ParseException e) { +// handleException(e); +// } +} + +void Tag() throws IOException : +{ + Token t1, t2; + boolean inImg = false; +} +{ + t1=<TagName> { + String tagName = t1.image.toLowerCase(); + if(Tags.WS_ELEMS.contains(tagName) ) { + addSpace(); + } + inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE> + inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META> + inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE> + inImg = tagName.equalsIgnoreCase("<img"); // keep track if in <IMG> + } + (t1=<ArgName> + (<ArgEquals> + (t2=ArgValue() // save ALT text in IMG tag + { + if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null) + addText("[" + t2.image + "]"); + + if(inMetaTag && + ( t1.image.equalsIgnoreCase("name") || + t1.image.equalsIgnoreCase("HTTP-EQUIV") + ) + && t2 != null) + { + currentMetaTag=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } + } + if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != +null) + { + currentMetaContent=t2.image.toLowerCase(); + if(currentMetaTag != null && currentMetaContent != null) { + addMetaTag(); + } + } + } + )? + )? + )* + <TagEnd> +} + +Token ArgValue() : +{ + Token t = null; +} +{ + t=<ArgValue> { return t; } +| LOOKAHEAD(2) + <ArgQuote1> <CloseQuote1> { return t; } +| <ArgQuote1> t=<Quote1Text> <CloseQuote1> { return t; } +| LOOKAHEAD(2) + <ArgQuote2> <CloseQuote2> { return t; } +| <ArgQuote2> t=<Quote2Text> <CloseQuote2> { return t; } +} + + +Token Decl() : +{ + Token t; +} +{ + t=<DeclName> ( <ArgName> | ArgValue() | <ArgEquals> )* <TagEnd> + { return t; } +} + + +void CommentTag() : +{} +{ + (<Comment1> ( <CommentText1> )* <CommentEnd1>) + | + (<Comment2> ( <CommentText2> )* <CommentEnd2>) +} + +void ScriptTag() : +{} +{ + <ScriptStart> ( <ScriptText> )* <ScriptEnd> +} + + +TOKEN : +{ + < ScriptStart: "<script" > : WithinScript +| < TagName: "<" ("/")? ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag +| < DeclName: "<" "!" ["A"-"Z","a"-"z"] (<ArgName>)? > : WithinTag + +| < Comment1: "<!--" > : WithinComment1 +| < Comment2: "<!" > : WithinComment2 + +| < Word: ( <LET> | <LET> (["+","/"])+ | <NUM> ["\""] | + <LET> ["-","'"] <LET> | ("$")? <NUM> [",","."] <NUM> )+ > +| < #LET: ["A"-"Z","a"-"z","0"-"9"] > +| < #NUM: ["0"-"9"] > +| < #HEX: ["0"-"9","A"-"F","a"-"f"] > + +| < Entity: ( "&" (["A"-"Z","a"-"z"])+ (";")? | "&" "#" (<NUM>)+ (";")? | "&" "#" ["X","x"] (<HEX>)+ (";")? ) > + +| < Space: (<SP>)+ > +| < #SP: [" ","\t","\r","\n"] > + +| < Punct: ~[] > // Keep this last. It is a catch-all. +} + +<WithinScript> TOKEN: +{ + < ScriptText: (~["<",">"])+ | "<" | ">" > +| < ScriptEnd: "</script" (~["<",">"])* ">" > : DEFAULT +} + +<WithinTag> TOKEN: +{ + < ArgName: (~[" ","\t","\r","\n","=",">","'","\""]) + (~[" ","\t","\r","\n","=",">"])* > +| < ArgEquals: "=" > : AfterEquals +| < TagEnd: ">" | "=>" > : DEFAULT +} + +<AfterEquals> TOKEN: +{ + < ArgValue: (~[" ","\t","\r","\n","=",">","'","\""]) + (~[" ","\t","\r","\n",">"])* > : WithinTag +} + +<WithinTag, AfterEquals> TOKEN: +{ + < ArgQuote1: "'" > : WithinQuote1 +| < ArgQuote2: "\"" > : WithinQuote2 +} + +<WithinTag, AfterEquals> SKIP: +{ + < <Space> > +} + +<WithinQuote1> TOKEN: +{ + < Quote1Text: (~["'"])+ > +| < CloseQuote1: <ArgQuote1> > : WithinTag +} + +<WithinQuote2> TOKEN: +{ + < Quote2Text: (~["\""])+ > +| < CloseQuote2: <ArgQuote2> > : WithinTag +} + + +<WithinComment1> TOKEN : +{ + < CommentText1: (~["-"])+ | "-" > +| < CommentEnd1: "-->" > : DEFAULT +} + +<WithinComment2> TOKEN : +{ + < CommentText2: (~[">"])+ > +| < CommentEnd2: ">" > : DEFAULT +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParserConstants.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParserConstants.java new file mode 100644 index 000000000..6bec4bf1f --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParserConstants.java @@ -0,0 +1,80 @@ +/* Generated By:JavaCC: Do not edit this line. HTMLParserConstants.java */ +package org.apache.lucene.demo.html; + +public interface HTMLParserConstants { + + int EOF = 0; + int ScriptStart = 1; + int TagName = 2; + int DeclName = 3; + int Comment1 = 4; + int Comment2 = 5; + int Word = 6; + int LET = 7; + int NUM = 8; + int HEX = 9; + int Entity = 10; + int Space = 11; + int SP = 12; + int Punct = 13; + int ScriptText = 14; + int ScriptEnd = 15; + int ArgName = 16; + int ArgEquals = 17; + int TagEnd = 18; + int ArgValue = 19; + int ArgQuote1 = 20; + int ArgQuote2 = 21; + int Quote1Text = 23; + int CloseQuote1 = 24; + int Quote2Text = 25; + int CloseQuote2 = 26; + int CommentText1 = 27; + int CommentEnd1 = 28; + int CommentText2 = 29; + int CommentEnd2 = 30; + + int DEFAULT = 0; + int WithinScript = 1; + int WithinTag = 2; + int AfterEquals = 3; + int WithinQuote1 = 4; + int WithinQuote2 = 5; + int WithinComment1 = 6; + int WithinComment2 = 7; + + String[] tokenImage = { + "<EOF>", //$NON-NLS-1$ + "\"<script\"", //$NON-NLS-1$ + "<TagName>", //$NON-NLS-1$ + "<DeclName>", //$NON-NLS-1$ + "\"<!--\"", //$NON-NLS-1$ + "\"<!\"", //$NON-NLS-1$ + "<Word>", //$NON-NLS-1$ + "<LET>", //$NON-NLS-1$ + "<NUM>", //$NON-NLS-1$ + "<HEX>", //$NON-NLS-1$ + "<Entity>", //$NON-NLS-1$ + "<Space>", //$NON-NLS-1$ + "<SP>", //$NON-NLS-1$ + "<Punct>", //$NON-NLS-1$ + "<ScriptText>", //$NON-NLS-1$ + "<ScriptEnd>", //$NON-NLS-1$ + "<ArgName>", //$NON-NLS-1$ + "\"=\"", //$NON-NLS-1$ + "<TagEnd>", //$NON-NLS-1$ + "<ArgValue>", //$NON-NLS-1$ + "\"\\\'\"", //$NON-NLS-1$ + "\"\\\"\"", //$NON-NLS-1$ + "<token of kind 22>", //$NON-NLS-1$ + "<Quote1Text>", //$NON-NLS-1$ + "<CloseQuote1>", //$NON-NLS-1$ + "<Quote2Text>", //$NON-NLS-1$ + "<CloseQuote2>", //$NON-NLS-1$ + "<CommentText1>", //$NON-NLS-1$ + "\"-->\"", //$NON-NLS-1$ + "<CommentText2>", //$NON-NLS-1$ + "\">\"", //$NON-NLS-1$ + }; + +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java new file mode 100644 index 000000000..048d0e2ad --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/HTMLParserTokenManager.java @@ -0,0 +1,1580 @@ +/* Generated By:JavaCC: Do not edit this line. HTMLParserTokenManager.java */ +package org.apache.lucene.demo.html; + +public class HTMLParserTokenManager implements HTMLParserConstants +{ + public java.io.PrintStream debugStream = System.out; + public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } +private final int jjStopStringLiteralDfa_0(int pos, long active0) +{ + switch (pos) + { + case 0: + if ((active0 & 0x32L) != 0L) + return 20; + return -1; + case 1: + if ((active0 & 0x2L) != 0L) + { + if (jjmatchedPos != 1) + { + jjmatchedKind = 2; + jjmatchedPos = 1; + } + return 22; + } + if ((active0 & 0x30L) != 0L) + return 25; + return -1; + case 2: + if ((active0 & 0x2L) != 0L) + { + jjmatchedKind = 2; + jjmatchedPos = 2; + return 23; + } + return -1; + case 3: + if ((active0 & 0x2L) != 0L) + { + jjmatchedKind = 2; + jjmatchedPos = 3; + return 23; + } + return -1; + case 4: + if ((active0 & 0x2L) != 0L) + { + jjmatchedKind = 2; + jjmatchedPos = 4; + return 23; + } + return -1; + case 5: + if ((active0 & 0x2L) != 0L) + { + jjmatchedKind = 2; + jjmatchedPos = 5; + return 23; + } + return -1; + default : + return -1; + } +} +private final int jjStartNfa_0(int pos, long active0) +{ + return jjMoveNfa_0(jjStopStringLiteralDfa_0(pos, active0), pos + 1); +} +private final int jjStopAtPos(int pos, int kind) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + return pos + 1; +} +private final int jjStartNfaWithStates_0(int pos, int kind, int state) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_0(state, pos + 1); +} +private final int jjMoveStringLiteralDfa0_0() +{ + switch(curChar) + { + case 60: + return jjMoveStringLiteralDfa1_0(0x32L); + default : + return jjMoveNfa_0(11, 0); + } +} +private final int jjMoveStringLiteralDfa1_0(long active0) +{ + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(0, active0); + return 1; + } + switch(curChar) + { + case 33: + if ((active0 & 0x20L) != 0L) + { + jjmatchedKind = 5; + jjmatchedPos = 1; + } + return jjMoveStringLiteralDfa2_0(active0, 0x10L); + case 115: + return jjMoveStringLiteralDfa2_0(active0, 0x2L); + default : + break; + } + return jjStartNfa_0(0, active0); +} +private final int jjMoveStringLiteralDfa2_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(0, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(1, active0); + return 2; + } + switch(curChar) + { + case 45: + return jjMoveStringLiteralDfa3_0(active0, 0x10L); + case 99: + return jjMoveStringLiteralDfa3_0(active0, 0x2L); + default : + break; + } + return jjStartNfa_0(1, active0); +} +private final int jjMoveStringLiteralDfa3_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(1, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(2, active0); + return 3; + } + switch(curChar) + { + case 45: + if ((active0 & 0x10L) != 0L) + return jjStopAtPos(3, 4); + break; + case 114: + return jjMoveStringLiteralDfa4_0(active0, 0x2L); + default : + break; + } + return jjStartNfa_0(2, active0); +} +private final int jjMoveStringLiteralDfa4_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(2, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(3, active0); + return 4; + } + switch(curChar) + { + case 105: + return jjMoveStringLiteralDfa5_0(active0, 0x2L); + default : + break; + } + return jjStartNfa_0(3, active0); +} +private final int jjMoveStringLiteralDfa5_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(3, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(4, active0); + return 5; + } + switch(curChar) + { + case 112: + return jjMoveStringLiteralDfa6_0(active0, 0x2L); + default : + break; + } + return jjStartNfa_0(4, active0); +} +private final int jjMoveStringLiteralDfa6_0(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_0(4, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_0(5, active0); + return 6; + } + switch(curChar) + { + case 116: + if ((active0 & 0x2L) != 0L) + return jjStartNfaWithStates_0(6, 1, 23); + break; + default : + break; + } + return jjStartNfa_0(5, active0); +} +private final void jjCheckNAdd(int state) +{ + if (jjrounds[state] != jjround) + { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; + } +} +private final void jjAddStates(int start, int end) +{ + do { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } while (start++ != end); +} +private final void jjCheckNAddTwoStates(int state1, int state2) +{ + jjCheckNAdd(state1); + jjCheckNAdd(state2); +} +private final void jjCheckNAddStates(int start, int end) +{ + do { + jjCheckNAdd(jjnextStates[start]); + } while (start++ != end); +} +static final long[] jjbitVec0 = { + 0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL +}; +static final long[] jjbitVec2 = { + 0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL +}; +private final int jjMoveNfa_0(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 28; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 20: + if (curChar == 33) + jjstateSet[jjnewStateCnt++] = 25; + else if (curChar == 47) + jjCheckNAdd(21); + break; + case 11: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(7, 2); + else if ((0x100002600L & l) != 0L) + { + if (kind > 11) + kind = 11; + jjCheckNAdd(10); + } + else if (curChar == 60) + jjCheckNAddStates(0, 2); + else if (curChar == 38) + jjAddStates(3, 5); + else if (curChar == 36) + jjstateSet[jjnewStateCnt++] = 1; + if ((0x3ff000000000000L & l) != 0L) + { + if (kind > 6) + kind = 6; + jjCheckNAddStates(6, 10); + } + break; + case 0: + if (curChar == 36) + jjstateSet[jjnewStateCnt++] = 1; + break; + case 1: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAdd(2); + break; + case 2: + if ((0x500000000000L & l) != 0L) + jjstateSet[jjnewStateCnt++] = 3; + break; + case 3: + case 9: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 6) + kind = 6; + jjCheckNAddStates(11, 13); + break; + case 4: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 6) + kind = 6; + jjCheckNAddStates(6, 10); + break; + case 5: + if ((0x880000000000L & l) == 0L) + break; + if (kind > 6) + kind = 6; + jjCheckNAddStates(14, 17); + break; + case 6: + if ((0x3ff000000000000L & l) != 0L) + jjCheckNAddTwoStates(7, 2); + break; + case 7: + if (curChar != 34) + break; + if (kind > 6) + kind = 6; + jjCheckNAddStates(11, 13); + break; + case 8: + if ((0x208000000000L & l) != 0L) + jjstateSet[jjnewStateCnt++] = 9; + break; + case 10: + if ((0x100002600L & l) == 0L) + break; + kind = 11; + jjCheckNAdd(10); + break; + case 13: + if (curChar == 59 && kind > 10) + kind = 10; + break; + case 14: + if (curChar == 35) + jjCheckNAdd(15); + break; + case 15: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 10) + kind = 10; + jjCheckNAddTwoStates(15, 13); + break; + case 16: + if (curChar == 35) + jjstateSet[jjnewStateCnt++] = 17; + break; + case 18: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 10) + kind = 10; + jjCheckNAddTwoStates(18, 13); + break; + case 19: + if (curChar == 60) + jjCheckNAddStates(0, 2); + break; + case 22: + if ((0x9fffff7affffd9ffL & l) == 0L) + break; + if (kind > 2) + kind = 2; + jjCheckNAdd(23); + break; + case 23: + if ((0x9ffffffeffffd9ffL & l) == 0L) + break; + if (kind > 2) + kind = 2; + jjCheckNAdd(23); + break; + case 24: + if (curChar == 33) + jjstateSet[jjnewStateCnt++] = 25; + break; + case 26: + if ((0x9fffff7affffd9ffL & l) == 0L) + break; + if (kind > 3) + kind = 3; + jjCheckNAdd(27); + break; + case 27: + if ((0x9ffffffeffffd9ffL & l) == 0L) + break; + if (kind > 3) + kind = 3; + jjCheckNAdd(27); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 20: + case 21: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 2) + kind = 2; + jjstateSet[jjnewStateCnt++] = 22; + break; + case 11: + case 4: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 6) + kind = 6; + jjCheckNAddStates(6, 10); + break; + case 9: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 6) + kind = 6; + jjCheckNAddStates(11, 13); + break; + case 12: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 10) + kind = 10; + jjCheckNAddTwoStates(12, 13); + break; + case 17: + if ((0x100000001000000L & l) != 0L) + jjCheckNAdd(18); + break; + case 18: + if ((0x7e0000007eL & l) == 0L) + break; + if (kind > 10) + kind = 10; + jjCheckNAddTwoStates(18, 13); + break; + case 22: + case 23: + if (kind > 2) + kind = 2; + jjCheckNAdd(23); + break; + case 25: + if ((0x7fffffe07fffffeL & l) == 0L) + break; + if (kind > 3) + kind = 3; + jjstateSet[jjnewStateCnt++] = 26; + break; + case 26: + case 27: + if (kind > 3) + kind = 3; + jjCheckNAdd(27); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 22: + case 23: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 2) + kind = 2; + jjCheckNAdd(23); + break; + case 26: + case 27: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 3) + kind = 3; + jjCheckNAdd(27); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 28 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjMoveStringLiteralDfa0_5() +{ + return jjMoveNfa_5(1, 0); +} +private final int jjMoveNfa_5(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 2; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 1: + if ((0xfffffffbffffffffL & l) != 0L) + { + if (kind > 25) + kind = 25; + jjCheckNAdd(0); + } + else if (curChar == 34) + { + if (kind > 26) + kind = 26; + } + break; + case 0: + if ((0xfffffffbffffffffL & l) == 0L) + break; + kind = 25; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + kind = 25; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 25) + kind = 25; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 2 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjMoveStringLiteralDfa0_7() +{ + switch(curChar) + { + case 62: + return jjStopAtPos(0, 30); + default : + return jjMoveNfa_7(0, 0); + } +} +private final int jjMoveNfa_7(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 1; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0xbfffffffffffffffL & l) == 0L) + break; + kind = 29; + jjstateSet[jjnewStateCnt++] = 0; + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + do + { + switch(jjstateSet[--i]) + { + case 0: + kind = 29; + jjstateSet[jjnewStateCnt++] = 0; + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 0: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 29) + kind = 29; + jjstateSet[jjnewStateCnt++] = 0; + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 1 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjMoveStringLiteralDfa0_4() +{ + return jjMoveNfa_4(1, 0); +} +private final int jjMoveNfa_4(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 2; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 1: + if ((0xffffff7fffffffffL & l) != 0L) + { + if (kind > 23) + kind = 23; + jjCheckNAdd(0); + } + else if (curChar == 39) + { + if (kind > 24) + kind = 24; + } + break; + case 0: + if ((0xffffff7fffffffffL & l) == 0L) + break; + kind = 23; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + kind = 23; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 23) + kind = 23; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 2 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjMoveStringLiteralDfa0_3() +{ + switch(curChar) + { + case 34: + return jjStopAtPos(0, 21); + case 39: + return jjStopAtPos(0, 20); + default : + return jjMoveNfa_3(0, 0); + } +} +private final int jjMoveNfa_3(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 3; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x9fffff7affffd9ffL & l) != 0L) + { + if (kind > 19) + kind = 19; + jjCheckNAdd(1); + } + else if ((0x100002600L & l) != 0L) + { + if (kind > 22) + kind = 22; + jjCheckNAdd(2); + } + break; + case 1: + if ((0xbffffffeffffd9ffL & l) == 0L) + break; + if (kind > 19) + kind = 19; + jjCheckNAdd(1); + break; + case 2: + if ((0x100002600L & l) == 0L) + break; + kind = 22; + jjCheckNAdd(2); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + do + { + switch(jjstateSet[--i]) + { + case 0: + case 1: + if (kind > 19) + kind = 19; + jjCheckNAdd(1); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 0: + case 1: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 19) + kind = 19; + jjCheckNAdd(1); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjStopStringLiteralDfa_6(int pos, long active0) +{ + switch (pos) + { + case 0: + if ((active0 & 0x10000000L) != 0L) + { + jjmatchedKind = 27; + return -1; + } + return -1; + case 1: + if ((active0 & 0x10000000L) != 0L) + { + if (jjmatchedPos == 0) + { + jjmatchedKind = 27; + jjmatchedPos = 0; + } + return -1; + } + return -1; + default : + return -1; + } +} +private final int jjStartNfa_6(int pos, long active0) +{ + return jjMoveNfa_6(jjStopStringLiteralDfa_6(pos, active0), pos + 1); +} +private final int jjMoveStringLiteralDfa0_6() +{ + switch(curChar) + { + case 45: + return jjMoveStringLiteralDfa1_6(0x10000000L); + default : + return jjMoveNfa_6(1, 0); + } +} +private final int jjMoveStringLiteralDfa1_6(long active0) +{ + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_6(0, active0); + return 1; + } + switch(curChar) + { + case 45: + return jjMoveStringLiteralDfa2_6(active0, 0x10000000L); + default : + break; + } + return jjStartNfa_6(0, active0); +} +private final int jjMoveStringLiteralDfa2_6(long old0, long active0) +{ + if (((active0 &= old0)) == 0L) + return jjStartNfa_6(0, old0); + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_6(1, active0); + return 2; + } + switch(curChar) + { + case 62: + if ((active0 & 0x10000000L) != 0L) + return jjStopAtPos(2, 28); + break; + default : + break; + } + return jjStartNfa_6(1, active0); +} +private final int jjMoveNfa_6(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 2; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 1: + if ((0xffffdfffffffffffL & l) != 0L) + { + if (kind > 27) + kind = 27; + jjCheckNAdd(0); + } + else if (curChar == 45) + { + if (kind > 27) + kind = 27; + } + break; + case 0: + if ((0xffffdfffffffffffL & l) == 0L) + break; + kind = 27; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + kind = 27; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 27) + kind = 27; + jjCheckNAdd(0); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 2 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjMoveStringLiteralDfa0_1() +{ + return jjMoveNfa_1(1, 0); +} +private final int jjMoveNfa_1(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 12; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 1: + if ((0xafffffffffffffffL & l) != 0L) + { + if (kind > 14) + kind = 14; + jjCheckNAdd(0); + } + else if ((0x5000000000000000L & l) != 0L) + { + if (kind > 14) + kind = 14; + } + if (curChar == 60) + jjstateSet[jjnewStateCnt++] = 10; + break; + case 0: + if ((0xafffffffffffffffL & l) == 0L) + break; + if (kind > 14) + kind = 14; + jjCheckNAdd(0); + break; + case 3: + if ((0xafffffffffffffffL & l) != 0L) + jjAddStates(18, 19); + break; + case 4: + if (curChar == 62 && kind > 15) + kind = 15; + break; + case 10: + if (curChar == 47) + jjstateSet[jjnewStateCnt++] = 9; + break; + case 11: + if (curChar == 60) + jjstateSet[jjnewStateCnt++] = 10; + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + if (kind > 14) + kind = 14; + jjCheckNAdd(0); + break; + case 2: + if (curChar == 116) + jjCheckNAddTwoStates(3, 4); + break; + case 3: + jjCheckNAddTwoStates(3, 4); + break; + case 5: + if (curChar == 112) + jjstateSet[jjnewStateCnt++] = 2; + break; + case 6: + if (curChar == 105) + jjstateSet[jjnewStateCnt++] = 5; + break; + case 7: + if (curChar == 114) + jjstateSet[jjnewStateCnt++] = 6; + break; + case 8: + if (curChar == 99) + jjstateSet[jjnewStateCnt++] = 7; + break; + case 9: + if (curChar == 115) + jjstateSet[jjnewStateCnt++] = 8; + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 1: + case 0: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 14) + kind = 14; + jjCheckNAdd(0); + break; + case 3: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjAddStates(18, 19); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 12 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjStartNfaWithStates_2(int pos, int kind, int state) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_2(state, pos + 1); +} +private final int jjMoveStringLiteralDfa0_2() +{ + switch(curChar) + { + case 34: + return jjStopAtPos(0, 21); + case 39: + return jjStopAtPos(0, 20); + case 61: + return jjStartNfaWithStates_2(0, 17, 3); + default : + return jjMoveNfa_2(0, 0); + } +} +private final int jjMoveNfa_2(int startState, int curPos) +{ + int startsAt = 0; + jjnewStateCnt = 6; + int i = 1; + jjstateSet[0] = startState; + int kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x9fffff7affffd9ffL & l) != 0L) + { + if (kind > 16) + kind = 16; + jjCheckNAdd(1); + } + else if ((0x100002600L & l) != 0L) + { + if (kind > 22) + kind = 22; + jjCheckNAdd(5); + } + else if (curChar == 61) + jjstateSet[jjnewStateCnt++] = 3; + else if (curChar == 62) + { + if (kind > 18) + kind = 18; + } + break; + case 1: + if ((0x9ffffffeffffd9ffL & l) == 0L) + break; + if (kind > 16) + kind = 16; + jjCheckNAdd(1); + break; + case 2: + case 3: + if (curChar == 62 && kind > 18) + kind = 18; + break; + case 4: + if (curChar == 61) + jjstateSet[jjnewStateCnt++] = 3; + break; + case 5: + if ((0x100002600L & l) == 0L) + break; + kind = 22; + jjCheckNAdd(5); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + do + { + switch(jjstateSet[--i]) + { + case 0: + case 1: + if (kind > 16) + kind = 16; + jjCheckNAdd(1); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + do + { + switch(jjstateSet[--i]) + { + case 0: + case 1: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 16) + kind = 16; + jjCheckNAdd(1); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 6 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +static final int[] jjnextStates = { + 20, 21, 24, 12, 14, 16, 5, 8, 0, 4, 6, 0, 4, 6, 5, 0, + 4, 6, 3, 4, +}; +private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) +{ + switch(hiByte) + { + case 0: + return ((jjbitVec2[i2] & l2) != 0L); + default : + if ((jjbitVec0[i1] & l1) != 0L) + return true; + return false; + } +} +public static final String[] jjstrLiteralImages = { +"", "\74\163\143\162\151\160\164", null, null, "\74\41\55\55", "\74\41", null, //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ +null, null, null, null, null, null, null, null, null, null, "\75", null, null, //$NON-NLS-1$ +"\47", "\42", null, null, null, null, null, null, "\55\55\76", null, "\76", }; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ +public static final String[] lexStateNames = { + "DEFAULT", //$NON-NLS-1$ + "WithinScript", //$NON-NLS-1$ + "WithinTag", //$NON-NLS-1$ + "AfterEquals", //$NON-NLS-1$ + "WithinQuote1", //$NON-NLS-1$ + "WithinQuote2", //$NON-NLS-1$ + "WithinComment1", //$NON-NLS-1$ + "WithinComment2", //$NON-NLS-1$ +}; +public static final int[] jjnewLexState = { + -1, 1, 2, 2, 6, 7, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 3, 0, 2, 4, 5, -1, -1, 2, + -1, 2, -1, 0, -1, 0, +}; +static final long[] jjtoToken = { + 0x7fbfec7fL, +}; +static final long[] jjtoSkip = { + 0x400000L, +}; +protected SimpleCharStream input_stream; +private final int[] jjrounds = new int[28]; +private final int[] jjstateSet = new int[56]; +protected char curChar; +public HTMLParserTokenManager(SimpleCharStream stream){ + if (SimpleCharStream.staticFlag) + throw new Error("ERROR: Cannot use a static CharStream class with a non-static lexical analyzer."); //$NON-NLS-1$ + input_stream = stream; +} +public HTMLParserTokenManager(SimpleCharStream stream, int lexState){ + this(stream); + SwitchTo(lexState); +} +public void ReInit(SimpleCharStream stream) +{ + jjmatchedPos = jjnewStateCnt = 0; + curLexState = defaultLexState; + input_stream = stream; + ReInitRounds(); +} +private final void ReInitRounds() +{ + int i; + jjround = 0x80000001; + for (i = 28; i-- > 0;) + jjrounds[i] = 0x80000000; +} +public void ReInit(SimpleCharStream stream, int lexState) +{ + ReInit(stream); + SwitchTo(lexState); +} +public void SwitchTo(int lexState) +{ + if (lexState >= 8 || lexState < 0) + throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); //$NON-NLS-1$ //$NON-NLS-2$ + else + curLexState = lexState; +} + +protected Token jjFillToken() +{ + Token t = Token.newToken(jjmatchedKind); + t.kind = jjmatchedKind; + String im = jjstrLiteralImages[jjmatchedKind]; + t.image = (im == null) ? input_stream.GetImage() : im; + t.beginLine = input_stream.getBeginLine(); + t.beginColumn = input_stream.getBeginColumn(); + t.endLine = input_stream.getEndLine(); + t.endColumn = input_stream.getEndColumn(); + return t; +} + +int curLexState = 0; +int defaultLexState = 0; +int jjnewStateCnt; +int jjround; +int jjmatchedPos; +int jjmatchedKind; + +public Token getNextToken() +{ + Token matchedToken; + int curPos = 0; + + EOFLoop : + for (;;) + { + try + { + curChar = input_stream.BeginToken(); + } + catch(java.io.IOException e) + { + jjmatchedKind = 0; + matchedToken = jjFillToken(); + return matchedToken; + } + + switch(curLexState) + { + case 0: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_0(); + if (jjmatchedPos == 0 && jjmatchedKind > 13) + { + jjmatchedKind = 13; + } + break; + case 1: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_1(); + break; + case 2: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_2(); + break; + case 3: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_3(); + break; + case 4: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_4(); + break; + case 5: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_5(); + break; + case 6: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_6(); + break; + case 7: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_7(); + break; + } + if (jjmatchedKind != 0x7fffffff) + { + if (jjmatchedPos + 1 < curPos) + input_stream.backup(curPos - jjmatchedPos - 1); + if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + matchedToken = jjFillToken(); + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + return matchedToken; + } + else + { + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + continue EOFLoop; + } + } + int error_line = input_stream.getEndLine(); + int error_column = input_stream.getEndColumn(); + String error_after = null; + boolean EOFSeen = false; + try { input_stream.readChar(); input_stream.backup(1); } + catch (java.io.IOException e1) { + EOFSeen = true; + error_after = curPos <= 1 ? "" : input_stream.GetImage(); //$NON-NLS-1$ + if (curChar == '\n' || curChar == '\r') { + error_line++; + error_column = 0; + } + else + error_column++; + } + if (!EOFSeen) { + input_stream.backup(1); + error_after = curPos <= 1 ? "" : input_stream.GetImage(); //$NON-NLS-1$ + } + throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); + } +} + +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParseException.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParseException.java new file mode 100644 index 000000000..513d68b3f --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParseException.java @@ -0,0 +1,194 @@ +/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0 */ +package org.apache.lucene.demo.html; + +/** + * This exception is thrown when parse errors are encountered. + * You can explicitly create objects of this exception type by + * calling the method generateParseException in the generated + * parser. + * + * You can modify this class to customize your error reporting + * mechanisms so long as you retain the public fields. + */ +public class ParseException extends Exception { + + private static final long serialVersionUID = 1L; + + /** + * This constructor is used by the method "generateParseException" + * in the generated parser. Calling this constructor generates + * a new object of this type with the fields "currentToken", + * "expectedTokenSequences", and "tokenImage" set. The boolean + * flag "specialConstructor" is also set to true to indicate that + * this constructor was used to create this object. + * This constructor calls its super class with the empty string + * to force the "toString" method of parent class "Throwable" to + * print the error message in the form: + * ParseException: <result of getMessage> + */ + public ParseException(Token currentTokenVal, + int[][] expectedTokenSequencesVal, + String[] tokenImageVal + ) + { + super(""); //$NON-NLS-1$ + specialConstructor = true; + currentToken = currentTokenVal; + expectedTokenSequences = expectedTokenSequencesVal; + tokenImage = tokenImageVal; + } + + /** + * The following constructors are for use by you for whatever + * purpose you can think of. Constructing the exception in this + * manner makes the exception behave in the normal way - i.e., as + * documented in the class "Throwable". The fields "errorToken", + * "expectedTokenSequences", and "tokenImage" do not contain + * relevant information. The JavaCC generated code does not use + * these constructors. + */ + + public ParseException() { + super(); + specialConstructor = false; + } + + public ParseException(String message) { + super(message); + specialConstructor = false; + } + + /** + * This variable determines which constructor was used to create + * this object and thereby affects the semantics of the + * "getMessage" method (see below). + */ + protected boolean specialConstructor; + + /** + * This is the last token that has been consumed successfully. If + * this object has been created due to a parse error, the token + * followng this token will (therefore) be the first error token. + */ + public Token currentToken; + + /** + * Each entry in this array is an array of integers. Each array + * of integers represents a sequence of tokens (by their ordinal + * values) that is expected at this point of the parse. + */ + public int[][] expectedTokenSequences; + + /** + * This is a reference to the "tokenImage" array of the generated + * parser within which the parse error occurred. This array is + * defined in the generated ...Constants interface. + */ + public String[] tokenImage; + + /** + * This method has the standard behavior when this object has been + * created using the standard constructors. Otherwise, it uses + * "currentToken" and "expectedTokenSequences" to generate a parse + * error message and returns it. If this object has been created + * due to a parse error, and you do not catch it (it gets thrown + * from the parser), then this method is called during the printing + * of the final stack trace, and hence the correct error message + * gets displayed. + */ + public String getMessage() { + if (!specialConstructor) { + return super.getMessage(); + } + StringBuffer expected = new StringBuffer(); + int maxSize = 0; + for (int i = 0; i < expectedTokenSequences.length; i++) { + if (maxSize < expectedTokenSequences[i].length) { + maxSize = expectedTokenSequences[i].length; + } + for (int j = 0; j < expectedTokenSequences[i].length; j++) { + expected.append(tokenImage[expectedTokenSequences[i][j]]).append(" "); //$NON-NLS-1$ + } + if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] != 0) { + expected.append("..."); //$NON-NLS-1$ + } + expected.append(eol).append(" "); //$NON-NLS-1$ + } + String retval = "Encountered \""; //$NON-NLS-1$ + Token tok = currentToken.next; + for (int i = 0; i < maxSize; i++) { + if (i != 0) retval += " "; //$NON-NLS-1$ + if (tok.kind == 0) { + retval += tokenImage[0]; + break; + } + retval += add_escapes(tok.image); + tok = tok.next; + } + retval += "\" at line " + currentToken.next.beginLine + ", column " + currentToken.next.beginColumn; //$NON-NLS-1$ //$NON-NLS-2$ + retval += "." + eol; //$NON-NLS-1$ + if (expectedTokenSequences.length == 1) { + retval += "Was expecting:" + eol + " "; //$NON-NLS-1$ //$NON-NLS-2$ + } else { + retval += "Was expecting one of:" + eol + " "; //$NON-NLS-1$ //$NON-NLS-2$ + } + retval += expected.toString(); + return retval; + } + + /** + * The end of line string for this machine. + */ + protected String eol = System.getProperty("line.separator", "\n"); //$NON-NLS-1$ //$NON-NLS-2$ + + /** + * Used to convert raw characters to their escaped version + * when these raw version cannot be used as part of an ASCII + * string literal. + */ + protected String add_escapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); //$NON-NLS-1$ + continue; + case '\t': + retval.append("\\t"); //$NON-NLS-1$ + continue; + case '\n': + retval.append("\\n"); //$NON-NLS-1$ + continue; + case '\f': + retval.append("\\f"); //$NON-NLS-1$ + continue; + case '\r': + retval.append("\\r"); //$NON-NLS-1$ + continue; + case '\"': + retval.append("\\\""); //$NON-NLS-1$ + continue; + case '\'': + retval.append("\\\'"); //$NON-NLS-1$ + continue; + case '\\': + retval.append("\\\\"); //$NON-NLS-1$ + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); //$NON-NLS-1$ + retval.append("\\u" + s.substring(s.length() - 4, s.length())); //$NON-NLS-1$ + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java new file mode 100644 index 000000000..8d7cdb408 --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/ParserThread.java @@ -0,0 +1,48 @@ +package org.apache.lucene.demo.html; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.io.*; + +class ParserThread extends Thread { + HTMLParser parser; + + ParserThread(HTMLParser p) { + parser = p; + } + + public void run() { // convert pipeOut to pipeIn + try { + try { // parse document to pipeOut + parser.HTMLDocument(); + } catch (ParseException e) { + System.out.println("Parse Aborted: " + e.getMessage()); //$NON-NLS-1$ + } catch (TokenMgrError e) { + System.out.println("Parse Aborted: " + e.getMessage()); //$NON-NLS-1$ + } finally { + parser.pipeOut.close(); + synchronized (parser) { + parser.summary.setLength(HTMLParser.SUMMARY_LENGTH); + parser.titleComplete = true; + parser.notifyAll(); + } + } + } catch (IOException e) { + e.printStackTrace(); + } + } +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/SimpleCharStream.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/SimpleCharStream.java new file mode 100644 index 000000000..bdbb382c8 --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/SimpleCharStream.java @@ -0,0 +1,439 @@ +/* Generated By:JavaCC: Do not edit this line. SimpleCharStream.java Version 4.0 */ +package org.apache.lucene.demo.html; + +/** + * An implementation of interface CharStream, where the stream is assumed to + * contain only ASCII characters (without unicode processing). + */ + +public class SimpleCharStream +{ + public static final boolean staticFlag = false; + int bufsize; + int available; + int tokenBegin; + public int bufpos = -1; + protected int bufline[]; + protected int bufcolumn[]; + + protected int column = 0; + protected int line = 1; + + protected boolean prevCharIsCR = false; + protected boolean prevCharIsLF = false; + + protected java.io.Reader inputStream; + + protected char[] buffer; + protected int maxNextCharInd = 0; + protected int inBuf = 0; + protected int tabSize = 8; + + protected void setTabSize(int i) { tabSize = i; } + protected int getTabSize(int i) { return tabSize; } + + + protected void ExpandBuff(boolean wrapAround) + { + char[] newbuffer = new char[bufsize + 2048]; + int newbufline[] = new int[bufsize + 2048]; + int newbufcolumn[] = new int[bufsize + 2048]; + + try + { + if (wrapAround) + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + System.arraycopy(buffer, 0, newbuffer, + bufsize - tokenBegin, bufpos); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + System.arraycopy(bufline, 0, newbufline, bufsize - tokenBegin, bufpos); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + System.arraycopy(bufcolumn, 0, newbufcolumn, bufsize - tokenBegin, bufpos); + bufcolumn = newbufcolumn; + + maxNextCharInd = (bufpos += (bufsize - tokenBegin)); + } + else + { + System.arraycopy(buffer, tokenBegin, newbuffer, 0, bufsize - tokenBegin); + buffer = newbuffer; + + System.arraycopy(bufline, tokenBegin, newbufline, 0, bufsize - tokenBegin); + bufline = newbufline; + + System.arraycopy(bufcolumn, tokenBegin, newbufcolumn, 0, bufsize - tokenBegin); + bufcolumn = newbufcolumn; + + maxNextCharInd = (bufpos -= tokenBegin); + } + } + catch (Throwable t) + { + throw new Error(t.getMessage()); + } + + + bufsize += 2048; + available = bufsize; + tokenBegin = 0; + } + + protected void FillBuff() throws java.io.IOException + { + if (maxNextCharInd == available) + { + if (available == bufsize) + { + if (tokenBegin > 2048) + { + bufpos = maxNextCharInd = 0; + available = tokenBegin; + } + else if (tokenBegin < 0) + bufpos = maxNextCharInd = 0; + else + ExpandBuff(false); + } + else if (available > tokenBegin) + available = bufsize; + else if ((tokenBegin - available) < 2048) + ExpandBuff(true); + else + available = tokenBegin; + } + + int i; + try { + if ((i = inputStream.read(buffer, maxNextCharInd, + available - maxNextCharInd)) == -1) + { + inputStream.close(); + throw new java.io.IOException(); + } + else + maxNextCharInd += i; + return; + } + catch(java.io.IOException e) { + --bufpos; + backup(0); + if (tokenBegin == -1) + tokenBegin = bufpos; + throw e; + } + } + + public char BeginToken() throws java.io.IOException + { + tokenBegin = -1; + char c = readChar(); + tokenBegin = bufpos; + + return c; + } + + protected void UpdateLineColumn(char c) + { + column++; + + if (prevCharIsLF) + { + prevCharIsLF = false; + line += (column = 1); + } + else if (prevCharIsCR) + { + prevCharIsCR = false; + if (c == '\n') + { + prevCharIsLF = true; + } + else + line += (column = 1); + } + + switch (c) + { + case '\r' : + prevCharIsCR = true; + break; + case '\n' : + prevCharIsLF = true; + break; + case '\t' : + column--; + column += (tabSize - (column % tabSize)); + break; + default : + break; + } + + bufline[bufpos] = line; + bufcolumn[bufpos] = column; + } + + public char readChar() throws java.io.IOException + { + if (inBuf > 0) + { + --inBuf; + + if (++bufpos == bufsize) + bufpos = 0; + + return buffer[bufpos]; + } + + if (++bufpos >= maxNextCharInd) + FillBuff(); + + char c = buffer[bufpos]; + + UpdateLineColumn(c); + return (c); + } + + /** + * @deprecated + * @see #getEndColumn + */ + + public int getColumn() { + return bufcolumn[bufpos]; + } + + /** + * @deprecated + * @see #getEndLine + */ + + public int getLine() { + return bufline[bufpos]; + } + + public int getEndColumn() { + return bufcolumn[bufpos]; + } + + public int getEndLine() { + return bufline[bufpos]; + } + + public int getBeginColumn() { + return bufcolumn[tokenBegin]; + } + + public int getBeginLine() { + return bufline[tokenBegin]; + } + + public void backup(int amount) { + + inBuf += amount; + if ((bufpos -= amount) < 0) + bufpos += bufsize; + } + + public SimpleCharStream(java.io.Reader dstream, int startline, + int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + } + + public SimpleCharStream(java.io.Reader dstream, int startline, + int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + + public SimpleCharStream(java.io.Reader dstream) + { + this(dstream, 1, 1, 4096); + } + public void ReInit(java.io.Reader dstream, int startline, + int startcolumn, int buffersize) + { + inputStream = dstream; + line = startline; + column = startcolumn - 1; + + if (buffer == null || buffersize != buffer.length) + { + available = bufsize = buffersize; + buffer = new char[buffersize]; + bufline = new int[buffersize]; + bufcolumn = new int[buffersize]; + } + prevCharIsLF = prevCharIsCR = false; + tokenBegin = inBuf = maxNextCharInd = 0; + bufpos = -1; + } + + public void ReInit(java.io.Reader dstream, int startline, + int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } + + public void ReInit(java.io.Reader dstream) + { + ReInit(dstream, 1, 1, 4096); + } + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + this(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + + public SimpleCharStream(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + this(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + public SimpleCharStream(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, startline, startcolumn, 4096); + } + + public SimpleCharStream(java.io.InputStream dstream, int startline, + int startcolumn) + { + this(dstream, startline, startcolumn, 4096); + } + + public SimpleCharStream(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + this(dstream, encoding, 1, 1, 4096); + } + + public SimpleCharStream(java.io.InputStream dstream) + { + this(dstream, 1, 1, 4096); + } + + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn, int buffersize) throws java.io.UnsupportedEncodingException + { + ReInit(encoding == null ? new java.io.InputStreamReader(dstream) : new java.io.InputStreamReader(dstream, encoding), startline, startcolumn, buffersize); + } + + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn, int buffersize) + { + ReInit(new java.io.InputStreamReader(dstream), startline, startcolumn, buffersize); + } + + public void ReInit(java.io.InputStream dstream, String encoding) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, 1, 1, 4096); + } + + public void ReInit(java.io.InputStream dstream) + { + ReInit(dstream, 1, 1, 4096); + } + public void ReInit(java.io.InputStream dstream, String encoding, int startline, + int startcolumn) throws java.io.UnsupportedEncodingException + { + ReInit(dstream, encoding, startline, startcolumn, 4096); + } + public void ReInit(java.io.InputStream dstream, int startline, + int startcolumn) + { + ReInit(dstream, startline, startcolumn, 4096); + } + public String GetImage() + { + if (bufpos >= tokenBegin) + return new String(buffer, tokenBegin, bufpos - tokenBegin + 1); + else + return new String(buffer, tokenBegin, bufsize - tokenBegin) + + new String(buffer, 0, bufpos + 1); + } + + public char[] GetSuffix(int len) + { + char[] ret = new char[len]; + + if ((bufpos + 1) >= len) + System.arraycopy(buffer, bufpos - len + 1, ret, 0, len); + else + { + System.arraycopy(buffer, bufsize - (len - bufpos - 1), ret, 0, + len - bufpos - 1); + System.arraycopy(buffer, 0, ret, len - bufpos - 1, bufpos + 1); + } + + return ret; + } + + public void Done() + { + buffer = null; + bufline = null; + bufcolumn = null; + } + + /** + * Method to adjust line and column numbers for the start of a token. + */ + public void adjustBeginLineColumn(int newLine, int newCol) + { + int start = tokenBegin; + int len; + + if (bufpos >= tokenBegin) + { + len = bufpos - tokenBegin + inBuf + 1; + } + else + { + len = bufsize - tokenBegin + bufpos + 1 + inBuf; + } + + int i = 0, j = 0, k = 0; + int nextColDiff = 0, columnDiff = 0; + + while (i < len && + bufline[j = start % bufsize] == bufline[k = ++start % bufsize]) + { + bufline[j] = newLine; + nextColDiff = columnDiff + bufcolumn[k] - bufcolumn[j]; + bufcolumn[j] = newCol + columnDiff; + columnDiff = nextColDiff; + i++; + } + + if (i < len) + { + bufline[j] = newLine++; + bufcolumn[j] = newCol + columnDiff; + + while (i++ < len) + { + if (bufline[j = start % bufsize] != bufline[++start % bufsize]) + bufline[j] = newLine++; + else + bufline[j] = newLine; + } + } + + line = bufline[j]; + column = bufcolumn[j]; + } + +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Tags.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Tags.java new file mode 100644 index 000000000..aa37df0db --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Tags.java @@ -0,0 +1,63 @@ +package org.apache.lucene.demo.html; + +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + + +public final class Tags { + + /** + * contains all tags for which whitespaces have to be inserted for proper tokenization + */ + public static final Set WS_ELEMS = Collections.synchronizedSet(new HashSet()); + + static{ + WS_ELEMS.add("<hr"); //$NON-NLS-1$ + WS_ELEMS.add("<hr/"); // note that "<hr />" does not need to be listed explicitly //$NON-NLS-1$ + WS_ELEMS.add("<br"); //$NON-NLS-1$ + WS_ELEMS.add("<br/"); //$NON-NLS-1$ + WS_ELEMS.add("<p"); //$NON-NLS-1$ + WS_ELEMS.add("</p"); //$NON-NLS-1$ + WS_ELEMS.add("<div"); //$NON-NLS-1$ + WS_ELEMS.add("</div"); //$NON-NLS-1$ + WS_ELEMS.add("<td"); //$NON-NLS-1$ + WS_ELEMS.add("</td"); //$NON-NLS-1$ + WS_ELEMS.add("<li"); //$NON-NLS-1$ + WS_ELEMS.add("</li"); //$NON-NLS-1$ + WS_ELEMS.add("<q"); //$NON-NLS-1$ + WS_ELEMS.add("</q"); //$NON-NLS-1$ + WS_ELEMS.add("<blockquote"); //$NON-NLS-1$ + WS_ELEMS.add("</blockquote"); //$NON-NLS-1$ + WS_ELEMS.add("<dt"); //$NON-NLS-1$ + WS_ELEMS.add("</dt"); //$NON-NLS-1$ + WS_ELEMS.add("<h1"); //$NON-NLS-1$ + WS_ELEMS.add("</h1"); //$NON-NLS-1$ + WS_ELEMS.add("<h2"); //$NON-NLS-1$ + WS_ELEMS.add("</h2"); //$NON-NLS-1$ + WS_ELEMS.add("<h3"); //$NON-NLS-1$ + WS_ELEMS.add("</h3"); //$NON-NLS-1$ + WS_ELEMS.add("<h4"); //$NON-NLS-1$ + WS_ELEMS.add("</h4"); //$NON-NLS-1$ + WS_ELEMS.add("<h5"); //$NON-NLS-1$ + WS_ELEMS.add("</h5"); //$NON-NLS-1$ + WS_ELEMS.add("<h6"); //$NON-NLS-1$ + WS_ELEMS.add("</h6"); //$NON-NLS-1$ + } +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Token.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Token.java new file mode 100644 index 000000000..48373c993 --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/Token.java @@ -0,0 +1,81 @@ +/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */ +package org.apache.lucene.demo.html; + +/** + * Describes the input token stream. + */ + +public class Token { + + /** + * An integer that describes the kind of this token. This numbering + * system is determined by JavaCCParser, and a table of these numbers is + * stored in the file ...Constants.java. + */ + public int kind; + + /** + * beginLine and beginColumn describe the position of the first character + * of this token; endLine and endColumn describe the position of the + * last character of this token. + */ + public int beginLine, beginColumn, endLine, endColumn; + + /** + * The string image of the token. + */ + public String image; + + /** + * A reference to the next regular (non-special) token from the input + * stream. If this is the last token from the input stream, or if the + * token manager has not read tokens beyond this one, this field is + * set to null. This is true only if this token is also a regular + * token. Otherwise, see below for a description of the contents of + * this field. + */ + public Token next; + + /** + * This field is used to access special tokens that occur prior to this + * token, but after the immediately preceding regular (non-special) token. + * If there are no such special tokens, this field is set to null. + * When there are more than one such special token, this field refers + * to the last of these special tokens, which in turn refers to the next + * previous special token through its specialToken field, and so on + * until the first special token (whose specialToken field is null). + * The next fields of special tokens refer to other special tokens that + * immediately follow it (without an intervening regular token). If there + * is no such token, this field is null. + */ + public Token specialToken; + + /** + * Returns the image. + */ + public String toString() + { + return image; + } + + /** + * Returns a new Token object, by default. However, if you want, you + * can create and return subclass objects based on the value of ofKind. + * Simply add the cases to the switch for all those special cases. + * For example, if you have a subclass of Token called IDToken that + * you want to create if ofKind is ID, simlpy add something like : + * + * case MyParserConstants.ID : return new IDToken(); + * + * to the following switch statement. Then you can cast matchedToken + * variable to the appropriate type and use it in your lexical actions. + */ + public static final Token newToken(int ofKind) + { + switch(ofKind) + { + default : return new Token(); + } + } + +} diff --git a/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/TokenMgrError.java b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/TokenMgrError.java new file mode 100644 index 000000000..2932a14f6 --- /dev/null +++ b/org.eclipse.help.base/src_demo/org/apache/lucene/demo/html/TokenMgrError.java @@ -0,0 +1,136 @@ +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */ +package org.apache.lucene.demo.html; + +public class TokenMgrError extends Error +{ + + private static final long serialVersionUID = 1L; + + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ + + /** + * Lexical error occured. + */ + static final int LEXICAL_ERROR = 0; + + /** + * An attempt wass made to create a second instance of a static token manager. + */ + static final int STATIC_LEXER_ERROR = 1; + + /** + * Tried to change to an invalid lexical state. + */ + static final int INVALID_LEXICAL_STATE = 2; + + /** + * Detected (and bailed out of) an infinite loop in the token manager. + */ + static final int LOOP_DETECTED = 3; + + /** + * Indicates the reason why the exception is thrown. It will have + * one of the above 4 values. + */ + int errorCode; + + /** + * Replaces unprintable characters by their espaced (or unicode escaped) + * equivalents in the given string + */ + protected static final String addEscapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); //$NON-NLS-1$ + continue; + case '\t': + retval.append("\\t"); //$NON-NLS-1$ + continue; + case '\n': + retval.append("\\n"); //$NON-NLS-1$ + continue; + case '\f': + retval.append("\\f"); //$NON-NLS-1$ + continue; + case '\r': + retval.append("\\r"); //$NON-NLS-1$ + continue; + case '\"': + retval.append("\\\""); //$NON-NLS-1$ + continue; + case '\'': + retval.append("\\\'"); //$NON-NLS-1$ + continue; + case '\\': + retval.append("\\\\"); //$NON-NLS-1$ + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); //$NON-NLS-1$ + retval.append("\\u" + s.substring(s.length() - 4, s.length())); //$NON-NLS-1$ + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + + /** + * Returns a detailed message for the Error when it is thrown by the + * token manager to indicate a lexical error. + * Parameters : + * EOFSeen : indicates if EOF caused the lexicl error + * curLexState : lexical state in which this error occured + * errorLine : line number when the error occured + * errorColumn : column number when the error occured + * errorAfter : prefix that was seen before this error occured + * curchar : the offending character + * Note: You can customize the lexical error message by modifying this method. + */ + protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { + return("Lexical error at line " + //$NON-NLS-1$ + errorLine + ", column " + //$NON-NLS-1$ + errorColumn + ". Encountered: " + //$NON-NLS-1$ + (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ + "after : \"" + addEscapes(errorAfter) + "\""); //$NON-NLS-1$ //$NON-NLS-2$ + } + + /** + * You can also modify the body of this method to customize your error messages. + * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + * of end-users concern, so you can return something like : + * + * "Internal Error : Please file a bug report .... " + * + * from this method for such cases in the release version of your parser. + */ + public String getMessage() { + return super.getMessage(); + } + + /* + * Constructors of various flavors follow. + */ + + public TokenMgrError() { + } + + public TokenMgrError(String message, int reason) { + super(message); + errorCode = reason; + } + + public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { + this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); + } +} |