Author: ehatcher
Date: Mon Mar 7 08:08:02 2005
New Revision: 156431
URL: http://svn.apache.org/viewcvs?view=rev&rev=156431
Log:
Add new PrecedenceQueryParser so others can try it out and discuss any issues
with it. TestPrecedenceQueryParser is
a clone of TestQueryParser with two noted changes - one within testQPA and with
the testPrecedence method. testPrecedence
was added to TestQueryParser to show its awkward behavior (and ensure it
doesn't change).
Modified build to use Ant's <javacc> task, instead of launching directly with
<java>, which has built-in dependency
checking.
Added:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/Token.java
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java
lucene/java/trunk/src/test/org/apache/lucene/queryParser/precedence/
lucene/java/trunk/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
Modified:
lucene/java/trunk/build.xml
lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java
Modified: lucene/java/trunk/build.xml
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/build.xml?view=diff&r1=156430&r2=156431
==============================================================================
--- lucene/java/trunk/build.xml (original)
+++ lucene/java/trunk/build.xml Mon Mar 7 08:08:02 2005
@@ -32,9 +32,6 @@
<property name="demo.war.name" value="luceneweb"/>
<property name="javacc.home" location="."/>
- <property name="javacc.jar.dir" location="${javacc.home}/bin/lib"/>
- <property name="javacc.jar" location="${javacc.jar.dir}/javacc.jar"/>
- <property name="javacc.main.class" value="org.javacc.parser.Main"/>
<property name="jakarta.site2.home" location="../jakarta-site2"/>
@@ -101,8 +98,8 @@
<available
property="javacc.present"
- classname="${javacc.main.class}"
- classpath="${javacc.jar}"
+ classname="org.javacc.parser.Main"
+ classpath="${javacc.home}/bin/lib/javacc.jar"
/>
<available
@@ -579,15 +576,50 @@
<!-- ================================================================== -->
<!-- Build the JavaCC files into the source tree -->
<!-- ================================================================== -->
- <target name="javacc"
depends="javacc-StandardAnalyzer,javacc-QueryParser,javacc-HTMLParser"/>
+ <target name="jjdoc">
+ <mkdir dir="${build.dir}/docs/grammars"/>
+ <jjdoc target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
+ outputfile="${build.dir}/docs/grammars/QueryParser.html"
+ javacchome="${javacc.home}"
+ />
+ <jjdoc
target="src/java/org/apache/lucene/queryParser/PrecedenceQueryParser.jj"
+ outputfile="${build.dir}/docs/grammars/PrecedenceQueryParser.html"
+ javacchome="${javacc.home}"
+ />
+ <jjdoc
target="src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj"
+ outputfile="${build.dir}/docs/grammars/StandardTokenizer.html"
+ javacchome="${javacc.home}"
+ />
+ <jjdoc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
+ outputfile="${build.dir}/docs/grammars/HTMLParser.html"
+ javacchome="${javacc.home}"
+ />
+ </target>
+
+ <target name="javacc"
depends="javacc-StandardAnalyzer,javacc-QueryParser,javacc-PrecedenceQueryParser,javacc-HTMLParser"/>
+
+ <macrodef name="invoke-javacc">
+ <attribute name="target"/>
+ <attribute name="outputDir"/>
+ <sequential>
+ <javacc
+ target="@{target}"
+ outputDirectory="@{outputDir}"
+ debugTokenManager="${javacc.debug.tokenmgr}"
+ debugParser="${javacc.debug.parser}"
+ debuglookahead="${javacc.debug.lookahead}"
+ javacchome="${javacc.home}"
+ />
+ </sequential>
+ </macrodef>
<target name="javacc-StandardAnalyzer" depends="init,javacc-check"
if="javacc.present">
<!-- generate this in a build directory so we can exclude ParseException
-->
<mkdir dir="${build.dir}/gen/org/apache/lucene/analysis/standard"/>
- <antcall target="invoke-javacc">
- <param name="target"
location="src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj"/>
- <param name="output.dir"
location="${build.dir}/gen/org/apache/lucene/analysis/standard"/>
- </antcall>
+
+ <invoke-javacc
target="src/java/org/apache/lucene/analysis/standard/StandardTokenizer.jj"
+
outputDir="${build.dir}/gen/org/apache/lucene/analysis/standard"
+ />
<copy todir="src/java/org/apache/lucene/analysis/standard">
<fileset dir="${build.dir}/gen/org/apache/lucene/analysis/standard">
<include name="*.java"/>
@@ -597,28 +629,21 @@
</target>
<target name="javacc-QueryParser" depends="init,javacc-check"
if="javacc.present">
- <antcall target="invoke-javacc">
- <param name="target"
location="src/java/org/apache/lucene/queryParser/QueryParser.jj"/>
- <param name="output.dir"
location="src/java/org/apache/lucene/queryParser"/>
- </antcall>
+ <invoke-javacc
target="src/java/org/apache/lucene/queryParser/QueryParser.jj"
+ outputDir="src/java/org/apache/lucene/queryParser"
+ />
</target>
-
- <target name="javacc-HTMLParser" depends="init,javacc-check"
if="javacc.present">
- <antcall target="invoke-javacc">
- <param name="target"
location="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"/>
- <param name="output.dir"
location="src/demo/org/apache/lucene/demo/html"/>
- </antcall>
+
+ <target name="javacc-PrecedenceQueryParser" depends="init,javacc-check"
if="javacc.present">
+ <invoke-javacc
target="src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj"
+
outputDir="src/java/org/apache/lucene/queryParser/precedence"
+ />
</target>
- <target name="invoke-javacc">
- <java classname="${javacc.main.class}" fork="true">
- <classpath path="${javacc.jar}"/>
-
- <sysproperty key="install.root" file="${javacc.home}"/>
-
- <arg value="-OUTPUT_DIRECTORY:${output.dir}"/>
- <arg value="${target}"/>
- </java>
+ <target name="javacc-HTMLParser" depends="init,javacc-check"
if="javacc.present">
+ <invoke-javacc target="src/demo/org/apache/lucene/demo/html/HTMLParser.jj"
+ outputDir="src/demo/org/apache/lucene/demo/html"
+ />
</target>
<target name="default" depends="jar-core"/>
Added:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/CharStream.java?view=auto&rev=156431
==============================================================================
---
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
(added)
+++
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/CharStream.java
Mon Mar 7 08:08:02 2005
@@ -0,0 +1,110 @@
+/* Generated By:JavaCC: Do not edit this line. CharStream.java Version 3.0 */
+package org.apache.lucene.queryParser.precedence;
+
+/**
+ * This interface describes a character stream that maintains line and
+ * column number positions of the characters. It also has the capability
+ * to backup the stream to some extent. An implementation of this
+ * interface is used in the TokenManager implementation generated by
+ * JavaCCParser.
+ *
+ * All the methods except backup can be implemented in any fashion. backup
+ * needs to be implemented correctly for the correct operation of the lexer.
+ * Rest of the methods are all used to get information like line number,
+ * column number and the String that constitutes a token and are not used
+ * by the lexer. Hence their implementation won't affect the generated lexer's
+ * operation.
+ */
+
+public interface CharStream {
+
+ /**
+ * Returns the next character from the selected input. The method
+ * of selecting the input is the responsibility of the class
+ * implementing this interface. Can throw any java.io.IOException.
+ */
+ char readChar() throws java.io.IOException;
+
+ /**
+ * Returns the column position of the character last read.
+ * @deprecated
+ * @see #getEndColumn
+ */
+ int getColumn();
+
+ /**
+ * Returns the line number of the character last read.
+ * @deprecated
+ * @see #getEndLine
+ */
+ int getLine();
+
+ /**
+ * Returns the column number of the last character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getEndColumn();
+
+ /**
+ * Returns the line number of the last character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getEndLine();
+
+ /**
+ * Returns the column number of the first character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getBeginColumn();
+
+ /**
+ * Returns the line number of the first character for current token (being
+ * matched after the last call to BeginTOken).
+ */
+ int getBeginLine();
+
+ /**
+ * Backs up the input stream by amount steps. Lexer calls this method if it
+ * had already read some characters, but could not use them to match a
+ * (longer) token. So, they will be used again as the prefix of the next
+ * token and it is the implemetation's responsibility to do this right.
+ */
+ void backup(int amount);
+
+ /**
+ * Returns the next character that marks the beginning of the next token.
+ * All characters must remain in the buffer between two successive calls
+ * to this method to implement backup correctly.
+ */
+ char BeginToken() throws java.io.IOException;
+
+ /**
+ * Returns a string made up of characters from the marked token beginning
+ * to the current buffer position. Implementations have the choice of
returning
+ * anything that they want to. For example, for efficiency, one might decide
+ * to just return null, which is a valid implementation.
+ */
+ String GetImage();
+
+ /**
+ * Returns an array of characters that make up the suffix of length 'len' for
+ * the currently matched token. This is used to build up the matched string
+ * for use in actions in the case of MORE. A simple and inefficient
+ * implementation of this is as follows :
+ *
+ * {
+ * String t = GetImage();
+ * return t.substring(t.length() - len, t.length()).toCharArray();
+ * }
+ */
+ char[] GetSuffix(int len);
+
+ /**
+ * The lexer calls this function to indicate that it is done with the stream
+ * and hence implementations can free any resources held by this class.
+ * Again, the body of this function can be just empty and it will not
+ * affect the lexer's operation.
+ */
+ void Done();
+
+}
Added:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java?view=auto&rev=156431
==============================================================================
---
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java
(added)
+++
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/FastCharStream.java
Mon Mar 7 08:08:02 2005
@@ -0,0 +1,123 @@
+// FastCharStream.java
+package org.apache.lucene.queryParser.precedence;
+
+/**
+ * Copyright 2004 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import org.apache.lucene.queryParser.*;
+
+import java.io.*;
+
+/** An efficient implementation of JavaCC's CharStream interface. <p>Note that
+ * this does not do line-number counting, but instead keeps track of the
+ * character position of the token in the input, as required by Lucene's
[EMAIL PROTECTED]
+ * org.apache.lucene.analysis.Token} API. */
+public final class FastCharStream implements CharStream {
+ char[] buffer = null;
+
+ int bufferLength = 0; // end of valid chars
+ int bufferPosition = 0; // next char to read
+
+ int tokenStart = 0; // offset in buffer
+ int bufferStart = 0; // position in file of buffer
+
+ Reader input; // source of chars
+
+ /** Constructs from a Reader. */
+ public FastCharStream(Reader r) {
+ input = r;
+ }
+
+ public final char readChar() throws IOException {
+ if (bufferPosition >= bufferLength)
+ refill();
+ return buffer[bufferPosition++];
+ }
+
+ private final void refill() throws IOException {
+ int newPosition = bufferLength - tokenStart;
+
+ if (tokenStart == 0) { // token won't fit in buffer
+ if (buffer == null) { // first time: alloc buffer
+ buffer = new char[2048];
+ } else if (bufferLength == buffer.length) { // grow buffer
+ char[] newBuffer = new char[buffer.length*2];
+ System.arraycopy(buffer, 0, newBuffer, 0, bufferLength);
+ buffer = newBuffer;
+ }
+ } else { // shift token to front
+ System.arraycopy(buffer, tokenStart, buffer, 0, newPosition);
+ }
+
+ bufferLength = newPosition; // update state
+ bufferPosition = newPosition;
+ bufferStart += tokenStart;
+ tokenStart = 0;
+
+ int charsRead = // fill space in buffer
+ input.read(buffer, newPosition, buffer.length-newPosition);
+ if (charsRead == -1)
+ throw new IOException("read past eof");
+ else
+ bufferLength += charsRead;
+ }
+
+ public final char BeginToken() throws IOException {
+ tokenStart = bufferPosition;
+ return readChar();
+ }
+
+ public final void backup(int amount) {
+ bufferPosition -= amount;
+ }
+
+ public final String GetImage() {
+ return new String(buffer, tokenStart, bufferPosition - tokenStart);
+ }
+
+ public final char[] GetSuffix(int len) {
+ char[] value = new char[len];
+ System.arraycopy(buffer, bufferPosition - len, value, 0, len);
+ return value;
+ }
+
+ public final void Done() {
+ try {
+ input.close();
+ } catch (IOException e) {
+ System.err.println("Caught: " + e + "; ignoring.");
+ }
+ }
+
+ public final int getColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getLine() {
+ return 1;
+ }
+ public final int getEndColumn() {
+ return bufferStart + bufferPosition;
+ }
+ public final int getEndLine() {
+ return 1;
+ }
+ public final int getBeginColumn() {
+ return bufferStart + tokenStart;
+ }
+ public final int getBeginLine() {
+ return 1;
+ }
+}
Added:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/ParseException.java?view=auto&rev=156431
==============================================================================
---
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
(added)
+++
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/ParseException.java
Mon Mar 7 08:08:02 2005
@@ -0,0 +1,192 @@
+/* Generated By:JavaCC: Do not edit this line. ParseException.java Version 3.0
*/
+package org.apache.lucene.queryParser.precedence;
+
+/**
+ * This exception is thrown when parse errors are encountered.
+ * You can explicitly create objects of this exception type by
+ * calling the method generateParseException in the generated
+ * parser.
+ *
+ * You can modify this class to customize your error reporting
+ * mechanisms so long as you retain the public fields.
+ */
+public class ParseException extends Exception {
+
+ /**
+ * This constructor is used by the method "generateParseException"
+ * in the generated parser. Calling this constructor generates
+ * a new object of this type with the fields "currentToken",
+ * "expectedTokenSequences", and "tokenImage" set. The boolean
+ * flag "specialConstructor" is also set to true to indicate that
+ * this constructor was used to create this object.
+ * This constructor calls its super class with the empty string
+ * to force the "toString" method of parent class "Throwable" to
+ * print the error message in the form:
+ * ParseException: <result of getMessage>
+ */
+ public ParseException(Token currentTokenVal,
+ int[][] expectedTokenSequencesVal,
+ String[] tokenImageVal
+ )
+ {
+ super("");
+ specialConstructor = true;
+ currentToken = currentTokenVal;
+ expectedTokenSequences = expectedTokenSequencesVal;
+ tokenImage = tokenImageVal;
+ }
+
+ /**
+ * The following constructors are for use by you for whatever
+ * purpose you can think of. Constructing the exception in this
+ * manner makes the exception behave in the normal way - i.e., as
+ * documented in the class "Throwable". The fields "errorToken",
+ * "expectedTokenSequences", and "tokenImage" do not contain
+ * relevant information. The JavaCC generated code does not use
+ * these constructors.
+ */
+
+ public ParseException() {
+ super();
+ specialConstructor = false;
+ }
+
+ public ParseException(String message) {
+ super(message);
+ specialConstructor = false;
+ }
+
+ /**
+ * This variable determines which constructor was used to create
+ * this object and thereby affects the semantics of the
+ * "getMessage" method (see below).
+ */
+ protected boolean specialConstructor;
+
+ /**
+ * This is the last token that has been consumed successfully. If
+ * this object has been created due to a parse error, the token
+ * followng this token will (therefore) be the first error token.
+ */
+ public Token currentToken;
+
+ /**
+ * Each entry in this array is an array of integers. Each array
+ * of integers represents a sequence of tokens (by their ordinal
+ * values) that is expected at this point of the parse.
+ */
+ public int[][] expectedTokenSequences;
+
+ /**
+ * This is a reference to the "tokenImage" array of the generated
+ * parser within which the parse error occurred. This array is
+ * defined in the generated ...Constants interface.
+ */
+ public String[] tokenImage;
+
+ /**
+ * This method has the standard behavior when this object has been
+ * created using the standard constructors. Otherwise, it uses
+ * "currentToken" and "expectedTokenSequences" to generate a parse
+ * error message and returns it. If this object has been created
+ * due to a parse error, and you do not catch it (it gets thrown
+ * from the parser), then this method is called during the printing
+ * of the final stack trace, and hence the correct error message
+ * gets displayed.
+ */
+ public String getMessage() {
+ if (!specialConstructor) {
+ return super.getMessage();
+ }
+ String expected = "";
+ int maxSize = 0;
+ for (int i = 0; i < expectedTokenSequences.length; i++) {
+ if (maxSize < expectedTokenSequences[i].length) {
+ maxSize = expectedTokenSequences[i].length;
+ }
+ for (int j = 0; j < expectedTokenSequences[i].length; j++) {
+ expected += tokenImage[expectedTokenSequences[i][j]] + " ";
+ }
+ if (expectedTokenSequences[i][expectedTokenSequences[i].length - 1] !=
0) {
+ expected += "...";
+ }
+ expected += eol + " ";
+ }
+ String retval = "Encountered \"";
+ Token tok = currentToken.next;
+ for (int i = 0; i < maxSize; i++) {
+ if (i != 0) retval += " ";
+ if (tok.kind == 0) {
+ retval += tokenImage[0];
+ break;
+ }
+ retval += add_escapes(tok.image);
+ tok = tok.next;
+ }
+ retval += "\" at line " + currentToken.next.beginLine + ", column " +
currentToken.next.beginColumn;
+ retval += "." + eol;
+ if (expectedTokenSequences.length == 1) {
+ retval += "Was expecting:" + eol + " ";
+ } else {
+ retval += "Was expecting one of:" + eol + " ";
+ }
+ retval += expected;
+ return retval;
+ }
+
+ /**
+ * The end of line string for this machine.
+ */
+ protected String eol = System.getProperty("line.separator", "\n");
+
+ /**
+ * Used to convert raw characters to their escaped version
+ * when these raw version cannot be used as part of an ASCII
+ * string literal.
+ */
+ protected String add_escapes(String str) {
+ StringBuffer retval = new StringBuffer();
+ char ch;
+ for (int i = 0; i < str.length(); i++) {
+ switch (str.charAt(i))
+ {
+ case 0 :
+ continue;
+ case '\b':
+ retval.append("\\b");
+ continue;
+ case '\t':
+ retval.append("\\t");
+ continue;
+ case '\n':
+ retval.append("\\n");
+ continue;
+ case '\f':
+ retval.append("\\f");
+ continue;
+ case '\r':
+ retval.append("\\r");
+ continue;
+ case '\"':
+ retval.append("\\\"");
+ continue;
+ case '\'':
+ retval.append("\\\'");
+ continue;
+ case '\\':
+ retval.append("\\\\");
+ continue;
+ default:
+ if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) {
+ String s = "0000" + Integer.toString(ch, 16);
+ retval.append("\\u" + s.substring(s.length() - 4,
s.length()));
+ } else {
+ retval.append(ch);
+ }
+ continue;
+ }
+ }
+ return retval.toString();
+ }
+
+}
Added:
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java?view=auto&rev=156431
==============================================================================
---
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
(added)
+++
lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.java
Mon Mar 7 08:08:02 2005
@@ -0,0 +1,1283 @@
+/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParser.java */
+package org.apache.lucene.queryParser.precedence;
+
+import java.util.Vector;
+import java.io.*;
+import java.text.*;
+import java.util.*;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.util.Parameter;
+
+/**
+ * This class is generated by JavaCC. The only method that clients should need
+ * to call is [EMAIL PROTECTED] #parse(String)} or [EMAIL PROTECTED]
#parse(String, String, Analyzer)}.
+ *
+ * The syntax for query strings is as follows:
+ * A Query is a series of clauses.
+ * A clause may be prefixed by:
+ * <ul>
+ * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
+ * that the clause is required or prohibited respectively; or
+ * <li> a term followed by a colon, indicating the field to be searched.
+ * This enables one to construct queries which search multiple fields.
+ * </ul>
+ *
+ * A clause may be either:
+ * <ul>
+ * <li> a term, indicating all the documents that contain this term; or
+ * <li> a nested query, enclosed in parentheses. Note that this may be used
+ * with a <code>+</code>/<code>-</code> prefix to require any of a set of
+ * terms.
+ * </ul>
+ *
+ * Thus, in BNF, the query grammar is:
+ * <pre>
+ * Query ::= ( Clause )*
+ * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+ * </pre>
+ *
+ * <p>
+ * Examples of appropriately formatted queries can be found in the <a
+ * href="http://jakarta.apache.org/lucene/docs/queryparsersyntax.html">query
syntax
+ * documentation</a>.
+ * </p>
+ *
+ * @author Brian Goetz
+ * @author Peter Halacsy
+ * @author Tatu Saloranta
+ */
+
+public class PrecedenceQueryParser implements PrecedenceQueryParserConstants {
+
+ private static final int CONJ_NONE = 0;
+ private static final int CONJ_AND = 1;
+ private static final int CONJ_OR = 2;
+
+ private static final int MOD_NONE = 0;
+ private static final int MOD_NOT = 10;
+ private static final int MOD_REQ = 11;
+
+ // make it possible to call setDefaultOperator() without accessing
+ // the nested class:
+ public static final Operator AND_OPERATOR = Operator.AND;
+ public static final Operator OR_OPERATOR = Operator.OR;
+
+ /** The actual operator that parser uses to combine query terms */
+ private Operator operator = OR_OPERATOR;
+
+ boolean lowercaseExpandedTerms = true;
+
+ Analyzer analyzer;
+ String field;
+ int phraseSlop = 0;
+ float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
+ int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
+ Locale locale = Locale.getDefault();
+
+ static final class Operator extends Parameter {
+ private Operator(String name) {
+ super(name);
+ }
+ static final Operator OR = new Operator("OR");
+ static final Operator AND = new Operator("AND");
+ }
+
+ /** Parses a query string, returning a [EMAIL PROTECTED]
org.apache.lucene.search.Query}.
+ * @param query the query string to be parsed.
+ * @param field the default field for query terms.
+ * @param analyzer used to find terms in the query text.
+ * @throws ParseException if the parsing fails
+ */
+ static public Query parse(String query, String field, Analyzer analyzer)
+ throws ParseException {
+ PrecedenceQueryParser parser = new PrecedenceQueryParser(field, analyzer);
+ return parser.parse(query);
+ }
+
+ /** Constructs a query parser.
+ * @param f the default field for query terms.
+ * @param a used to find terms in the query text.
+ */
+ public PrecedenceQueryParser(String f, Analyzer a) {
+ this(new FastCharStream(new StringReader("")));
+ analyzer = a;
+ field = f;
+ }
+
+ /** Parses a query string, returning a [EMAIL PROTECTED]
org.apache.lucene.search.Query}.
+ * @param query the query string to be parsed.
+ * @throws ParseException if the parsing fails
+ */
+ public Query parse(String expression) throws ParseException {
+ ReInit(new FastCharStream(new StringReader(expression)));
+ try {
+ Query query = Query(field);
+ return (query != null) ? query : new BooleanQuery();
+ }
+ catch (TokenMgrError tme) {
+ throw new ParseException(tme.getMessage());
+ }
+ catch (BooleanQuery.TooManyClauses tmc) {
+ throw new ParseException("Too many boolean clauses");
+ }
+ }
+
+ /**
+ * @return Returns the analyzer.
+ */
+ public Analyzer getAnalyzer() {
+ return analyzer;
+ }
+
+ /**
+ * @return Returns the field.
+ */
+ public String getField() {
+ return field;
+ }
+
+ /**
+ * Get the minimal similarity for fuzzy queries.
+ */
+ public float getFuzzyMinSim() {
+ return fuzzyMinSim;
+ }
+
+ /**
+ * Set the minimum similarity for fuzzy queries.
+ * Default is 0.5f.
+ */
+ public void setFuzzyMinSim(float fuzzyMinSim) {
+ this.fuzzyMinSim = fuzzyMinSim;
+ }
+
+ /**
+ * Get the prefix length for fuzzy queries.
+ * @return Returns the fuzzyPrefixLength.
+ */
+ public int getFuzzyPrefixLength() {
+ return fuzzyPrefixLength;
+ }
+
+ /**
+ * Set the prefix length for fuzzy queries. Default is 0.
+ * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
+ */
+ public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
+ this.fuzzyPrefixLength = fuzzyPrefixLength;
+ }
+
+ /**
+ * Sets the default slop for phrases. If zero, then exact phrase matches
+ * are required. Default value is zero.
+ */
+ public void setPhraseSlop(int phraseSlop) {
+ this.phraseSlop = phraseSlop;
+ }
+
+ /**
+ * Gets the default slop for phrases.
+ */
+ public int getPhraseSlop() {
+ return phraseSlop;
+ }
+
+ /**
+ * Sets the boolean operator of the QueryParser.
+ * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
+ * are considered optional: for example <code>capital of Hungary</code> is
equal to
+ * <code>capital OR of OR Hungary</code>.<br/>
+ * In <code>AND_OPERATOR</code> mode terms are considered to be in
conjuction: the
+ * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
+ */
+ public void setDefaultOperator(Operator op) {
+ this.operator = op;
+ }
+
+ /**
+ * Gets implicit operator setting, which will be either AND_OPERATOR
+ * or OR_OPERATOR.
+ */
+ public Operator getDefaultOperator() {
+ return operator;
+ }
+
+ /**
+ * Whether terms of wildcard, prefix, fuzzy and range queries are to be
automatically
+ * lower-cased or not. Default is <code>true</code>.
+ */
+ public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
+ this.lowercaseExpandedTerms = lowercaseExpandedTerms;
+ }
+
+ /**
+ * @see #setLowercaseExpandedTerms(boolean)
+ */
+ public boolean getLowercaseExpandedTerms() {
+ return lowercaseExpandedTerms;
+ }
+
+ /**
+ * Set locale used by date range parsing.
+ */
+ public void setLocale(Locale locale) {
+ this.locale = locale;
+ }
+
+ /**
+ * Returns current locale, allowing access by subclasses.
+ */
+ public Locale getLocale() {
+ return locale;
+ }
+
+ protected void addClause(Vector clauses, int conj, int modifier, Query q) {
+ boolean required, prohibited;
+
+ // If this term is introduced by AND, make the preceding term required,
+ // unless it's already prohibited
+ if (clauses.size() > 0 && conj == CONJ_AND) {
+ BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
+ if (!c.isProhibited())
+ c.setOccur(BooleanClause.Occur.MUST);
+ }
+
+ if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
+ // If this term is introduced by OR, make the preceding term optional,
+ // unless it's prohibited (that means we leave -a OR b but +a OR b-->a
OR b)
+ // notice if the input is a OR b, first term is parsed as required;
without
+ // this modification a OR b would parsed as +a OR b
+ BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
+ if (!c.isProhibited())
+ c.setOccur(BooleanClause.Occur.SHOULD);
+ }
+
+ // We might have been passed a null query; the term might have been
+ // filtered away by the analyzer.
+ if (q == null)
+ return;
+
+ if (operator == OR_OPERATOR) {
+ // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
+ // introduced by NOT or -; make sure not to set both.
+ prohibited = (modifier == MOD_NOT);
+ required = (modifier == MOD_REQ);
+ if (conj == CONJ_AND && !prohibited) {
+ required = true;
+ }
+ } else {
+ // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
+ // if not PROHIBITED and not introduced by OR
+ prohibited = (modifier == MOD_NOT);
+ required = (!prohibited && conj != CONJ_OR);
+ }
+ if (required && !prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
+ else if (!required && !prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
+ else if (!required && prohibited)
+ clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
+ else
+ throw new RuntimeException("Clause cannot be both required and
prohibited");
+ }
+
+ /**
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getFieldQuery(String field, String queryText) throws
ParseException {
+ // Use the analyzer to get all the tokens, and then build a TermQuery,
+ // PhraseQuery, or nothing based on the term count
+
+ TokenStream source = analyzer.tokenStream(field, new
StringReader(queryText));
+ Vector v = new Vector();
+ org.apache.lucene.analysis.Token t;
+ int positionCount = 0;
+ boolean severalTokensAtSamePosition = false;
+
+ while (true) {
+ try {
+ t = source.next();
+ }
+ catch (IOException e) {
+ t = null;
+ }
+ if (t == null)
+ break;
+ v.addElement(t);
+ if (t.getPositionIncrement() == 1)
+ positionCount++;
+ else
+ severalTokensAtSamePosition = true;
+ }
+ try {
+ source.close();
+ }
+ catch (IOException e) {
+ // ignore
+ }
+
+ if (v.size() == 0)
+ return null;
+ else if (v.size() == 1) {
+ t = (org.apache.lucene.analysis.Token) v.elementAt(0);
+ return new TermQuery(new Term(field, t.termText()));
+ } else {
+ if (severalTokensAtSamePosition) {
+ if (positionCount == 1) {
+ // no phrase query:
+ BooleanQuery q = new BooleanQuery();
+ for (int i = 0; i < v.size(); i++) {
+ t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+ TermQuery currentQuery = new TermQuery(
+ new Term(field, t.termText()));
+ q.add(currentQuery, BooleanClause.Occur.SHOULD);
+ }
+ return q;
+ }
+ else {
+ // phrase query:
+ MultiPhraseQuery mpq = new MultiPhraseQuery();
+ List multiTerms = new ArrayList();
+ for (int i = 0; i < v.size(); i++) {
+ t = (org.apache.lucene.analysis.Token) v.elementAt(i);
+ if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
+ mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ multiTerms.clear();
+ }
+ multiTerms.add(new Term(field, t.termText()));
+ }
+ mpq.add((Term[])multiTerms.toArray(new Term[0]));
+ return mpq;
+ }
+ }
+ else {
+ PhraseQuery q = new PhraseQuery();
+ q.setSlop(phraseSlop);
+ for (int i = 0; i < v.size(); i++) {
+ q.add(new Term(field, ((org.apache.lucene.analysis.Token)
+ v.elementAt(i)).termText()));
+
+ }
+ return q;
+ }
+ }
+ }
+
+ /**
+ * Base implementation delegates to [EMAIL PROTECTED]
#getFieldQuery(String,String)}.
+ * This method may be overridden, for example, to return
+ * a SpanNearQuery instead of a PhraseQuery.
+ *
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getFieldQuery(String field, String queryText, int slop)
+ throws ParseException {
+ Query query = getFieldQuery(field, queryText);
+
+ if (query instanceof PhraseQuery) {
+ ((PhraseQuery) query).setSlop(slop);
+ }
+ if (query instanceof MultiPhraseQuery) {
+ ((MultiPhraseQuery) query).setSlop(slop);
+ }
+
+ return query;
+ }
+
+ /**
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getRangeQuery(String field,
+ String part1,
+ String part2,
+ boolean inclusive) throws ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ part1 = part1.toLowerCase();
+ part2 = part2.toLowerCase();
+ }
+ try {
+ DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
+ df.setLenient(true);
+ Date d1 = df.parse(part1);
+ Date d2 = df.parse(part2);
+ part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY);
+ part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY);
+ }
+ catch (Exception e) { }
+
+ return new RangeQuery(new Term(field, part1),
+ new Term(field, part2),
+ inclusive);
+ }
+
+ /**
+ * Factory method for generating query, given a set of clauses.
+ * By default creates a boolean query composed of clauses passed in.
+ *
+ * Can be overridden by extending classes, to modify query being
+ * returned.
+ *
+ * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause}
instances
+ * to join.
+ *
+ * @return Resulting [EMAIL PROTECTED] Query} object.
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getBooleanQuery(Vector clauses) throws ParseException
+ {
+ if (clauses == null || clauses.size() == 0)
+ return null;
+
+ BooleanQuery query = new BooleanQuery();
+ for (int i = 0; i < clauses.size(); i++) {
+ query.add((BooleanClause)clauses.elementAt(i));
+ }
+ return query;
+ }
+
+ /**
+ * Factory method for generating a query. Called when parser
+ * parses an input term token that contains one or more wildcard
+ * characters (? and *), but is not a prefix term token (one
+ * that has just a single * character at the end)
+ *<p>
+ * Depending on settings, prefix term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with wildcard templates.
+ *<p>
+ * Can be overridden by extending classes, to provide custom handling for
+ * wildcard queries, which may be necessary due to missing analyzer calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token that contains one or more wild card
+ * characters (? or *), but is not simple prefix term
+ *
+ * @return Resulting [EMAIL PROTECTED] Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getWildcardQuery(String field, String termStr) throws
ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return new WildcardQuery(t);
+ }
+
+ /**
+ * Factory method for generating a query (similar to
+ * [EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses an input
term
+ * token that uses prefix notation; that is, contains a single '*' wildcard
+ * character as its last character. Since this is a special case
+ * of generic wildcard term, and such a query can be optimized easily,
+ * this usually results in a different query object.
+ *<p>
+ * Depending on settings, a prefix term may be lower-cased
+ * automatically. It will not go through the default Analyzer,
+ * however, since normal Analyzers are unlikely to work properly
+ * with wildcard templates.
+ *<p>
+ * Can be overridden by extending classes, to provide custom handling for
+ * wild card queries, which may be necessary due to missing analyzer calls.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ * (<b>without</b> trailing '*' character!)
+ *
+ * @return Resulting [EMAIL PROTECTED] Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getPrefixQuery(String field, String termStr) throws
ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return new PrefixQuery(t);
+ }
+
+ /**
+ * Factory method for generating a query (similar to
+ * [EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses
+ * an input term token that has the fuzzy suffix (~) appended.
+ *
+ * @param field Name of the field query will use.
+ * @param termStr Term token to use for building term for the query
+ *
+ * @return Resulting [EMAIL PROTECTED] Query} built for the term
+ * @exception ParseException throw in overridden method to disallow
+ */
+ protected Query getFuzzyQuery(String field, String termStr, float
minSimilarity) throws ParseException
+ {
+ if (lowercaseExpandedTerms) {
+ termStr = termStr.toLowerCase();
+ }
+ Term t = new Term(field, termStr);
+ return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
+ }
+
+ /**
+ * Returns a String where the escape char has been
+ * removed, or kept only once if there was a double escape.
+ */
+ private String discardEscapeChar(String input) {
+ char[] caSource = input.toCharArray();
+ char[] caDest = new char[caSource.length];
+ int j = 0;
+ for (int i = 0; i < caSource.length; i++) {
+ if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
+ caDest[j++]=caSource[i];
+ }
+ }
+ return new String(caDest, 0, j);
+ }
+
+ /**
+ * Returns a String where those characters that QueryParser
+ * expects to be escaped are escaped by a preceding <code>\</code>.
+ */
+ public static String escape(String s) {
+ StringBuffer sb = new StringBuffer();
+ for (int i = 0; i < s.length(); i++) {
+ char c = s.charAt(i);
+ // NOTE: keep this in sync with _ESCAPED_CHAR below!
+ if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c ==
')' || c == ':'
+ || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c ==
'}' || c == '~'
+ || c == '*' || c == '?') {
+ sb.append('\\');
+ }
+ sb.append(c);
+ }
+ return sb.toString();
+ }
+
+ /**
+ * Command line tool to test QueryParser, using [EMAIL PROTECTED]
org.apache.lucene.analysis.SimpleAnalyzer}.
+ * Usage:<br>
+ * <code>java org.apache.lucene.queryParser.QueryParser <input></code>
+ */
+ public static void main(String[] args) throws Exception {
+ if (args.length == 0) {
+ System.out.println("Usage: java
org.apache.lucene.queryParser.QueryParser <input>");
+ System.exit(0);
+ }
+ PrecedenceQueryParser qp = new PrecedenceQueryParser("field",
+ new org.apache.lucene.analysis.SimpleAnalyzer());
+ Query q = qp.parse(args[0]);
+ System.out.println(q.toString("field"));
+ }
+
+// * Query ::= ( Clause )*
+// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
+ final public int Conjunction() throws ParseException {
+ int ret = CONJ_NONE;
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case AND:
+ case OR:
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case AND:
+ jj_consume_token(AND);
+ ret = CONJ_AND;
+ break;
+ case OR:
+ jj_consume_token(OR);
+ ret = CONJ_OR;
+ break;
+ default:
+ jj_la1[0] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ break;
+ default:
+ jj_la1[1] = jj_gen;
+ ;
+ }
+ {if (true) return ret;}
+ throw new Error("Missing return statement in function");
+ }
+
+ final public int Modifier() throws ParseException {
+ int ret = MOD_NONE;
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case NOT:
+ case PLUS:
+ case MINUS:
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case PLUS:
+ jj_consume_token(PLUS);
+ ret = MOD_REQ;
+ break;
+ case MINUS:
+ jj_consume_token(MINUS);
+ ret = MOD_NOT;
+ break;
+ case NOT:
+ jj_consume_token(NOT);
+ ret = MOD_NOT;
+ break;
+ default:
+ jj_la1[2] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ break;
+ default:
+ jj_la1[3] = jj_gen;
+ ;
+ }
+ {if (true) return ret;}
+ throw new Error("Missing return statement in function");
+ }
+
+ final public Query Query(String field) throws ParseException {
+ Vector clauses = new Vector();
+ int modifier;
+ Query q, firstQuery=null;
+ modifier = Modifier();
+ q = orExpression(field);
+ addClause(clauses, CONJ_NONE, modifier, q);
+ firstQuery=q;
+ label_1:
+ while (true) {
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case NOT:
+ case PLUS:
+ case MINUS:
+ case LPAREN:
+ case QUOTED:
+ case TERM:
+ case PREFIXTERM:
+ case WILDTERM:
+ case RANGEIN_START:
+ case RANGEEX_START:
+ case NUMBER:
+ ;
+ break;
+ default:
+ jj_la1[4] = jj_gen;
+ break label_1;
+ }
+ modifier = Modifier();
+ q = orExpression(field);
+ addClause(clauses, CONJ_NONE, modifier, q);
+ }
+ if (clauses.size() == 1 && firstQuery != null)
+ {if (true) return firstQuery;}
+ else {
+ {if (true) return getBooleanQuery(clauses);}
+ }
+ throw new Error("Missing return statement in function");
+ }
+
+ final public Query orExpression(String field) throws ParseException {
+ Vector clauses = new Vector();
+ Query q, firstQuery=null;
+ int modifier;
+ q = andExpression(field);
+ addClause(clauses, CONJ_NONE, MOD_NONE, q);
+ firstQuery=q;
+ label_2:
+ while (true) {
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case OR:
+ ;
+ break;
+ default:
+ jj_la1[5] = jj_gen;
+ break label_2;
+ }
+ jj_consume_token(OR);
+ modifier = Modifier();
+ q = andExpression(field);
+ addClause(clauses, CONJ_OR, modifier, q);
+ }
+ if (clauses.size() == 1 && firstQuery != null)
+ {if (true) return firstQuery;}
+ else {
+ {if (true) return getBooleanQuery(clauses);}
+ }
+ throw new Error("Missing return statement in function");
+ }
+
+ final public Query andExpression(String field) throws ParseException {
+ Vector clauses = new Vector();
+ Query q, firstQuery=null;
+ int modifier;
+ q = Clause(field);
+ addClause(clauses, CONJ_NONE, MOD_NONE, q);
+ firstQuery=q;
+ label_3:
+ while (true) {
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case AND:
+ ;
+ break;
+ default:
+ jj_la1[6] = jj_gen;
+ break label_3;
+ }
+ jj_consume_token(AND);
+ modifier = Modifier();
+ q = Clause(field);
+ addClause(clauses, CONJ_AND, modifier, q);
+ }
+ if (clauses.size() == 1 && firstQuery != null)
+ {if (true) return firstQuery;}
+ else {
+ {if (true) return getBooleanQuery(clauses);}
+ }
+ throw new Error("Missing return statement in function");
+ }
+
+ final public Query Clause(String field) throws ParseException {
+ Query q;
+ Token fieldToken=null, boost=null;
+ if (jj_2_1(2)) {
+ fieldToken = jj_consume_token(TERM);
+ jj_consume_token(COLON);
+ field=discardEscapeChar(fieldToken.image);
+ } else {
+ ;
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case QUOTED:
+ case TERM:
+ case PREFIXTERM:
+ case WILDTERM:
+ case RANGEIN_START:
+ case RANGEEX_START:
+ case NUMBER:
+ q = Term(field);
+ break;
+ case LPAREN:
+ jj_consume_token(LPAREN);
+ q = Query(field);
+ jj_consume_token(RPAREN);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[7] = jj_gen;
+ ;
+ }
+ break;
+ default:
+ jj_la1[8] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ if (boost != null) {
+ float f = (float)1.0;
+ try {
+ f = Float.valueOf(boost.image).floatValue();
+ q.setBoost(f);
+ } catch (Exception ignored) { }
+ }
+ {if (true) return q;}
+ throw new Error("Missing return statement in function");
+ }
+
+ final public Query Term(String field) throws ParseException {
+ Token term, boost=null, fuzzySlop=null, goop1, goop2;
+ boolean prefix = false;
+ boolean wildcard = false;
+ boolean fuzzy = false;
+ boolean rangein = false;
+ Query q;
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case TERM:
+ case PREFIXTERM:
+ case WILDTERM:
+ case NUMBER:
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case TERM:
+ term = jj_consume_token(TERM);
+ break;
+ case PREFIXTERM:
+ term = jj_consume_token(PREFIXTERM);
+ prefix=true;
+ break;
+ case WILDTERM:
+ term = jj_consume_token(WILDTERM);
+ wildcard=true;
+ break;
+ case NUMBER:
+ term = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[9] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ break;
+ default:
+ jj_la1[10] = jj_gen;
+ ;
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ fuzzy=true;
+ break;
+ default:
+ jj_la1[11] = jj_gen;
+ ;
+ }
+ break;
+ default:
+ jj_la1[12] = jj_gen;
+ ;
+ }
+ String termImage=discardEscapeChar(term.image);
+ if (wildcard) {
+ q = getWildcardQuery(field, termImage);
+ } else if (prefix) {
+ q = getPrefixQuery(field,
+ discardEscapeChar(term.image.substring
+ (0, term.image.length()-1)));
+ } else if (fuzzy) {
+ float fms = fuzzyMinSim;
+ try {
+ fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
+ } catch (Exception ignored) { }
+ if(fms < 0.0f || fms > 1.0f){
+ {if (true) throw new ParseException("Minimum similarity for a
FuzzyQuery has to be between 0.0f and 1.0f !");}
+ }
+ q = getFuzzyQuery(field, termImage, fms);
+ } else {
+ q = getFieldQuery(field, termImage);
+ }
+ break;
+ case RANGEIN_START:
+ jj_consume_token(RANGEIN_START);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case RANGEIN_GOOP:
+ goop1 = jj_consume_token(RANGEIN_GOOP);
+ break;
+ case RANGEIN_QUOTED:
+ goop1 = jj_consume_token(RANGEIN_QUOTED);
+ break;
+ default:
+ jj_la1[13] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case RANGEIN_TO:
+ jj_consume_token(RANGEIN_TO);
+ break;
+ default:
+ jj_la1[14] = jj_gen;
+ ;
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case RANGEIN_GOOP:
+ goop2 = jj_consume_token(RANGEIN_GOOP);
+ break;
+ case RANGEIN_QUOTED:
+ goop2 = jj_consume_token(RANGEIN_QUOTED);
+ break;
+ default:
+ jj_la1[15] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ jj_consume_token(RANGEIN_END);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[16] = jj_gen;
+ ;
+ }
+ if (goop1.kind == RANGEIN_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ } else {
+ goop1.image = discardEscapeChar(goop1.image);
+ }
+ if (goop2.kind == RANGEIN_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ } else {
+ goop2.image = discardEscapeChar(goop2.image);
+ }
+ q = getRangeQuery(field, goop1.image, goop2.image, true);
+ break;
+ case RANGEEX_START:
+ jj_consume_token(RANGEEX_START);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case RANGEEX_GOOP:
+ goop1 = jj_consume_token(RANGEEX_GOOP);
+ break;
+ case RANGEEX_QUOTED:
+ goop1 = jj_consume_token(RANGEEX_QUOTED);
+ break;
+ default:
+ jj_la1[17] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case RANGEEX_TO:
+ jj_consume_token(RANGEEX_TO);
+ break;
+ default:
+ jj_la1[18] = jj_gen;
+ ;
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case RANGEEX_GOOP:
+ goop2 = jj_consume_token(RANGEEX_GOOP);
+ break;
+ case RANGEEX_QUOTED:
+ goop2 = jj_consume_token(RANGEEX_QUOTED);
+ break;
+ default:
+ jj_la1[19] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ jj_consume_token(RANGEEX_END);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[20] = jj_gen;
+ ;
+ }
+ if (goop1.kind == RANGEEX_QUOTED) {
+ goop1.image = goop1.image.substring(1, goop1.image.length()-1);
+ } else {
+ goop1.image = discardEscapeChar(goop1.image);
+ }
+ if (goop2.kind == RANGEEX_QUOTED) {
+ goop2.image = goop2.image.substring(1, goop2.image.length()-1);
+ } else {
+ goop2.image = discardEscapeChar(goop2.image);
+ }
+
+ q = getRangeQuery(field, goop1.image, goop2.image, false);
+ break;
+ case QUOTED:
+ term = jj_consume_token(QUOTED);
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case FUZZY_SLOP:
+ fuzzySlop = jj_consume_token(FUZZY_SLOP);
+ break;
+ default:
+ jj_la1[21] = jj_gen;
+ ;
+ }
+ switch ((jj_ntk==-1)?jj_ntk():jj_ntk) {
+ case CARAT:
+ jj_consume_token(CARAT);
+ boost = jj_consume_token(NUMBER);
+ break;
+ default:
+ jj_la1[22] = jj_gen;
+ ;
+ }
+ int s = phraseSlop;
+
+ if (fuzzySlop != null) {
+ try {
+ s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
+ }
+ catch (Exception ignored) { }
+ }
+ q = getFieldQuery(field, term.image.substring(1,
term.image.length()-1), s);
+ break;
+ default:
+ jj_la1[23] = jj_gen;
+ jj_consume_token(-1);
+ throw new ParseException();
+ }
+ if (boost != null) {
+ float f = (float) 1.0;
+ try {
+ f = Float.valueOf(boost.image).floatValue();
+ }
+ catch (Exception ignored) {
+ /* Should this be handled somehow? (defaults to "no boost", if
+ * boost number is invalid)
+ */
+ }
+
+ // avoid boosting null queries, such as those caused by stop words
+ if (q != null) {
+ q.setBoost(f);
+ }
+ }
+ {if (true) return q;}
+ throw new Error("Missing return statement in function");
+ }
+
+ final private boolean jj_2_1(int xla) {
+ jj_la = xla; jj_lastpos = jj_scanpos = token;
+ try { return !jj_3_1(); }
+ catch(LookaheadSuccess ls) { return true; }
+ finally { jj_save(0, xla); }
+ }
+
+ final private boolean jj_3_1() {
+ if (jj_scan_token(TERM)) return true;
+ if (jj_scan_token(COLON)) return true;
+ return false;
+ }
+
+ public PrecedenceQueryParserTokenManager token_source;
+ public Token token, jj_nt;
+ private int jj_ntk;
+ private Token jj_scanpos, jj_lastpos;
+ private int jj_la;
+ public boolean lookingAhead = false;
+ private boolean jj_semLA;
+ private int jj_gen;
+ final private int[] jj_la1 = new int[24];
+ static private int[] jj_la1_0;
+ static {
+ jj_la1_0();
+ }
+ private static void jj_la1_0() {
+ jj_la1_0 = new int[]
{0x180,0x180,0xe00,0xe00,0xfb1e00,0x100,0x80,0x8000,0xfb1000,0x9a0000,0x40000,0x40000,0x8000,0xc000000,0x1000000,0xc000000,0x8000,0xc0000000,0x10000000,0xc0000000,0x8000,0x40000,0x8000,0xfb0000,};
+ }
+ final private JJCalls[] jj_2_rtns = new JJCalls[1];
+ private boolean jj_rescan = false;
+ private int jj_gc = 0;
+
+ public PrecedenceQueryParser(CharStream stream) {
+ token_source = new PrecedenceQueryParserTokenManager(stream);
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 24; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ public void ReInit(CharStream stream) {
+ token_source.ReInit(stream);
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 24; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ public PrecedenceQueryParser(PrecedenceQueryParserTokenManager tm) {
+ token_source = tm;
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 24; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ public void ReInit(PrecedenceQueryParserTokenManager tm) {
+ token_source = tm;
+ token = new Token();
+ jj_ntk = -1;
+ jj_gen = 0;
+ for (int i = 0; i < 24; i++) jj_la1[i] = -1;
+ for (int i = 0; i < jj_2_rtns.length; i++) jj_2_rtns[i] = new JJCalls();
+ }
+
+ final private Token jj_consume_token(int kind) throws ParseException {
+ Token oldToken;
+ if ((oldToken = token).next != null) token = token.next;
+ else token = token.next = token_source.getNextToken();
+ jj_ntk = -1;
+ if (token.kind == kind) {
+ jj_gen++;
+ if (++jj_gc > 100) {
+ jj_gc = 0;
+ for (int i = 0; i < jj_2_rtns.length; i++) {
+ JJCalls c = jj_2_rtns[i];
+ while (c != null) {
+ if (c.gen < jj_gen) c.first = null;
+ c = c.next;
+ }
+ }
+ }
+ return token;
+ }
+ token = oldToken;
+ jj_kind = kind;
+ throw generateParseException();
+ }
+
+ static private final class LookaheadSuccess extends java.lang.Error { }
+ final private LookaheadSuccess jj_ls = new LookaheadSuccess();
+ final private boolean jj_scan_token(int kind) {
+ if (jj_scanpos == jj_lastpos) {
+ jj_la--;
+ if (jj_scanpos.next == null) {
+ jj_lastpos = jj_scanpos = jj_scanpos.next =
token_source.getNextToken();
+ } else {
+ jj_lastpos = jj_scanpos = jj_scanpos.next;
+ }
+ } else {
+ jj_scanpos = jj_scanpos.next;
+ }
+ if (jj_rescan) {
+ int i = 0; Token tok = token;
+ while (tok != null && tok != jj_scanpos) { i++; tok = tok.next; }
+ if (tok != null) jj_add_error_token(kind, i);
+ }
+ if (jj_scanpos.kind != kind) return true;
+ if (jj_la == 0 && jj_scanpos == jj_lastpos) throw jj_ls;
+ return false;
+ }
+
+ final public Token getNextToken() {
+ if (token.next != null) token = token.next;
+ else token = token.next = token_source.getNextToken();
+ jj_ntk = -1;
+ jj_gen++;
+ return token;
+ }
+
+ final public Token getToken(int index) {
+ Token t = lookingAhead ? jj_scanpos : token;
+ for (int i = 0; i < index; i++) {
+ if (t.next != null) t = t.next;
+ else t = t.next = token_source.getNextToken();
+ }
+ return t;
+ }
+
+ final private int jj_ntk() {
+ if ((jj_nt=token.next) == null)
+ return (jj_ntk = (token.next=token_source.getNextToken()).kind);
+ else
+ return (jj_ntk = jj_nt.kind);
+ }
+
+ private java.util.Vector jj_expentries = new java.util.Vector();
+ private int[] jj_expentry;
+ private int jj_kind = -1;
+ private int[] jj_lasttokens = new int[100];
+ private int jj_endpos;
+
+ private void jj_add_error_token(int kind, int pos) {
+ if (pos >= 100) return;
+ if (pos == jj_endpos + 1) {
+ jj_lasttokens[jj_endpos++] = kind;
+ } else if (jj_endpos != 0) {
+ jj_expentry = new int[jj_endpos];
+ for (int i = 0; i < jj_endpos; i++) {
+ jj_expentry[i] = jj_lasttokens[i];
+ }
+ boolean exists = false;
+ for (java.util.Enumeration e = jj_expentries.elements();
e.hasMoreElements();) {
+ int[] oldentry = (int[])(e.nextElement());
+ if (oldentry.length == jj_expentry.length) {
+ exists = true;
+ for (int i = 0; i < jj_expentry.length; i++) {
+ if (oldentry[i] != jj_expentry[i]) {
+ exists = false;
+ break;
+ }
+ }
+ if (exists) break;
+ }
+ }
+ if (!exists) jj_expentries.addElement(jj_expentry);
+ if (pos != 0) jj_lasttokens[(jj_endpos = pos) - 1] = kind;
+ }
+ }
+
+ public ParseException generateParseException() {
+ jj_expentries.removeAllElements();
+ boolean[] la1tokens = new boolean[32];
+ for (int i = 0; i < 32; i++) {
+ la1tokens[i] = false;
+ }
+ if (jj_kind >= 0) {
+ la1tokens[jj_kind] = true;
+ jj_kind = -1;
+ }
+ for (int i = 0; i < 24; i++) {
+ if (jj_la1[i] == jj_gen) {
+ for (int j = 0; j < 32; j++) {
+ if ((jj_la1_0[i] & (1<<j)) != 0) {
+ la1tokens[j] = true;
+ }
+ }
+ }
+ }
+ for (int i = 0; i < 32; i++) {
+ if (la1tokens[i]) {
+ jj_expentry = new int[1];
+ jj_expentry[0] = i;
+ jj_expentries.addElement(jj_expentry);
+ }
+ }
+ jj_endpos = 0;
+ jj_rescan_token();
+ jj_add_error_token(0, 0);
+ int[][] exptokseq = new int[jj_expentries.size()][];
+ for (int i = 0; i < jj_expentries.size(); i++) {
+ exptokseq[i] = (int[])jj_expentries.elementAt(i);
+ }
+ return new ParseException(token, exptokseq, tokenImage);
+ }
+
+ final public void enable_tracing() {
+ }
+
+ final public void disable_tracing() {
+ }
+
+ final private void jj_rescan_token() {
+ jj_rescan = true;
+ for (int i = 0; i < 1; i++) {
+ JJCalls p = jj_2_rtns[i];
+ do {
+ if (p.gen > jj_gen) {
+ jj_la = p.arg; jj_lastpos = jj_scanpos = p.first;
+ switch (i) {
+ case 0: jj_3_1(); break;
+ }
+ }
+ p = p.next;
+ } while (p != null);
+ }
+ jj_rescan = false;
+ }
+
+ final private void jj_save(int index, int xla) {
+ JJCalls p = jj_2_rtns[index];
+ while (p.gen > jj_gen) {
+ if (p.next == null) { p = p.next = new JJCalls(); break; }
+ p = p.next;
+ }
+ p.gen = jj_gen + xla - jj_la; p.first = token; p.arg = xla;
+ }
+
+ static final class JJCalls {
+ int gen;
+ Token first;
+ int arg;
+ JJCalls next;
+ }
+
+}