Added: lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj?view=auto&rev=156431 ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj (added) +++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParser.jj Mon Mar 7 08:08:02 2005 @@ -0,0 +1,910 @@ +/** + * Copyright 2004 The Apache Software Foundation + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +options { + STATIC=false; + JAVA_UNICODE_ESCAPE=true; + USER_CHAR_STREAM=true; +} + +PARSER_BEGIN(PrecedenceQueryParser) + +package org.apache.lucene.queryParser.precedence; + +import java.util.Vector; +import java.io.*; +import java.text.*; +import java.util.*; +import org.apache.lucene.index.Term; +import org.apache.lucene.analysis.*; +import org.apache.lucene.document.*; +import org.apache.lucene.search.*; +import org.apache.lucene.util.Parameter; + +/** + * This class is generated by JavaCC. The only method that clients should need + * to call is [EMAIL PROTECTED] #parse(String)} or [EMAIL PROTECTED] #parse(String, String, Analyzer)}. + * + * The syntax for query strings is as follows: + * A Query is a series of clauses. + * A clause may be prefixed by: + * <ul> + * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating + * that the clause is required or prohibited respectively; or + * <li> a term followed by a colon, indicating the field to be searched. + * This enables one to construct queries which search multiple fields. + * </ul> + * + * A clause may be either: + * <ul> + * <li> a term, indicating all the documents that contain this term; or + * <li> a nested query, enclosed in parentheses. Note that this may be used + * with a <code>+</code>/<code>-</code> prefix to require any of a set of + * terms. + * </ul> + * + * Thus, in BNF, the query grammar is: + * <pre> + * Query ::= ( Clause )* + * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + * </pre> + * + * <p> + * Examples of appropriately formatted queries can be found in the <a + * href="http://jakarta.apache.org/lucene/docs/queryparsersyntax.html">query syntax + * documentation</a>. + * </p> + * + * @author Brian Goetz + * @author Peter Halacsy + * @author Tatu Saloranta + */ + +public class PrecedenceQueryParser { + + private static final int CONJ_NONE = 0; + private static final int CONJ_AND = 1; + private static final int CONJ_OR = 2; + + private static final int MOD_NONE = 0; + private static final int MOD_NOT = 10; + private static final int MOD_REQ = 11; + + // make it possible to call setDefaultOperator() without accessing + // the nested class: + public static final Operator AND_OPERATOR = Operator.AND; + public static final Operator OR_OPERATOR = Operator.OR; + + /** The actual operator that parser uses to combine query terms */ + private Operator operator = OR_OPERATOR; + + boolean lowercaseExpandedTerms = true; + + Analyzer analyzer; + String field; + int phraseSlop = 0; + float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity; + int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength; + Locale locale = Locale.getDefault(); + + static final class Operator extends Parameter { + private Operator(String name) { + super(name); + } + static final Operator OR = new Operator("OR"); + static final Operator AND = new Operator("AND"); + } + + /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. + * @param query the query string to be parsed. + * @param field the default field for query terms. + * @param analyzer used to find terms in the query text. + * @throws ParseException if the parsing fails + */ + static public Query parse(String query, String field, Analyzer analyzer) + throws ParseException { + PrecedenceQueryParser parser = new PrecedenceQueryParser(field, analyzer); + return parser.parse(query); + } + + /** Constructs a query parser. + * @param f the default field for query terms. + * @param a used to find terms in the query text. + */ + public PrecedenceQueryParser(String f, Analyzer a) { + this(new FastCharStream(new StringReader(""))); + analyzer = a; + field = f; + } + + /** Parses a query string, returning a [EMAIL PROTECTED] org.apache.lucene.search.Query}. + * @param query the query string to be parsed. + * @throws ParseException if the parsing fails + */ + public Query parse(String expression) throws ParseException { + ReInit(new FastCharStream(new StringReader(expression))); + try { + Query query = Query(field); + return (query != null) ? query : new BooleanQuery(); + } + catch (TokenMgrError tme) { + throw new ParseException(tme.getMessage()); + } + catch (BooleanQuery.TooManyClauses tmc) { + throw new ParseException("Too many boolean clauses"); + } + } + + /** + * @return Returns the analyzer. + */ + public Analyzer getAnalyzer() { + return analyzer; + } + + /** + * @return Returns the field. + */ + public String getField() { + return field; + } + + /** + * Get the minimal similarity for fuzzy queries. + */ + public float getFuzzyMinSim() { + return fuzzyMinSim; + } + + /** + * Set the minimum similarity for fuzzy queries. + * Default is 0.5f. + */ + public void setFuzzyMinSim(float fuzzyMinSim) { + this.fuzzyMinSim = fuzzyMinSim; + } + + /** + * Get the prefix length for fuzzy queries. + * @return Returns the fuzzyPrefixLength. + */ + public int getFuzzyPrefixLength() { + return fuzzyPrefixLength; + } + + /** + * Set the prefix length for fuzzy queries. Default is 0. + * @param fuzzyPrefixLength The fuzzyPrefixLength to set. + */ + public void setFuzzyPrefixLength(int fuzzyPrefixLength) { + this.fuzzyPrefixLength = fuzzyPrefixLength; + } + + /** + * Sets the default slop for phrases. If zero, then exact phrase matches + * are required. Default value is zero. + */ + public void setPhraseSlop(int phraseSlop) { + this.phraseSlop = phraseSlop; + } + + /** + * Gets the default slop for phrases. + */ + public int getPhraseSlop() { + return phraseSlop; + } + + /** + * Sets the boolean operator of the QueryParser. + * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers + * are considered optional: for example <code>capital of Hungary</code> is equal to + * <code>capital OR of OR Hungary</code>.<br/> + * In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the + * above mentioned query is parsed as <code>capital AND of AND Hungary</code> + */ + public void setDefaultOperator(Operator op) { + this.operator = op; + } + + /** + * Gets implicit operator setting, which will be either AND_OPERATOR + * or OR_OPERATOR. + */ + public Operator getDefaultOperator() { + return operator; + } + + /** + * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically + * lower-cased or not. Default is <code>true</code>. + */ + public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) { + this.lowercaseExpandedTerms = lowercaseExpandedTerms; + } + + /** + * @see #setLowercaseExpandedTerms(boolean) + */ + public boolean getLowercaseExpandedTerms() { + return lowercaseExpandedTerms; + } + + /** + * Set locale used by date range parsing. + */ + public void setLocale(Locale locale) { + this.locale = locale; + } + + /** + * Returns current locale, allowing access by subclasses. + */ + public Locale getLocale() { + return locale; + } + + protected void addClause(Vector clauses, int conj, int modifier, Query q) { + boolean required, prohibited; + + // If this term is introduced by AND, make the preceding term required, + // unless it's already prohibited + if (clauses.size() > 0 && conj == CONJ_AND) { + BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.MUST); + } + + if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { + // If this term is introduced by OR, make the preceding term optional, + // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) + // notice if the input is a OR b, first term is parsed as required; without + // this modification a OR b would parsed as +a OR b + BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1); + if (!c.isProhibited()) + c.setOccur(BooleanClause.Occur.SHOULD); + } + + // We might have been passed a null query; the term might have been + // filtered away by the analyzer. + if (q == null) + return; + + if (operator == OR_OPERATOR) { + // We set REQUIRED if we're introduced by AND or +; PROHIBITED if + // introduced by NOT or -; make sure not to set both. + prohibited = (modifier == MOD_NOT); + required = (modifier == MOD_REQ); + if (conj == CONJ_AND && !prohibited) { + required = true; + } + } else { + // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED + // if not PROHIBITED and not introduced by OR + prohibited = (modifier == MOD_NOT); + required = (!prohibited && conj != CONJ_OR); + } + if (required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST)); + else if (!required && !prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD)); + else if (!required && prohibited) + clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT)); + else + throw new RuntimeException("Clause cannot be both required and prohibited"); + } + + /** + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText) throws ParseException { + // Use the analyzer to get all the tokens, and then build a TermQuery, + // PhraseQuery, or nothing based on the term count + + TokenStream source = analyzer.tokenStream(field, new StringReader(queryText)); + Vector v = new Vector(); + org.apache.lucene.analysis.Token t; + int positionCount = 0; + boolean severalTokensAtSamePosition = false; + + while (true) { + try { + t = source.next(); + } + catch (IOException e) { + t = null; + } + if (t == null) + break; + v.addElement(t); + if (t.getPositionIncrement() == 1) + positionCount++; + else + severalTokensAtSamePosition = true; + } + try { + source.close(); + } + catch (IOException e) { + // ignore + } + + if (v.size() == 0) + return null; + else if (v.size() == 1) { + t = (org.apache.lucene.analysis.Token) v.elementAt(0); + return new TermQuery(new Term(field, t.termText())); + } else { + if (severalTokensAtSamePosition) { + if (positionCount == 1) { + // no phrase query: + BooleanQuery q = new BooleanQuery(); + for (int i = 0; i < v.size(); i++) { + t = (org.apache.lucene.analysis.Token) v.elementAt(i); + TermQuery currentQuery = new TermQuery( + new Term(field, t.termText())); + q.add(currentQuery, BooleanClause.Occur.SHOULD); + } + return q; + } + else { + // phrase query: + MultiPhraseQuery mpq = new MultiPhraseQuery(); + List multiTerms = new ArrayList(); + for (int i = 0; i < v.size(); i++) { + t = (org.apache.lucene.analysis.Token) v.elementAt(i); + if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) { + mpq.add((Term[])multiTerms.toArray(new Term[0])); + multiTerms.clear(); + } + multiTerms.add(new Term(field, t.termText())); + } + mpq.add((Term[])multiTerms.toArray(new Term[0])); + return mpq; + } + } + else { + PhraseQuery q = new PhraseQuery(); + q.setSlop(phraseSlop); + for (int i = 0; i < v.size(); i++) { + q.add(new Term(field, ((org.apache.lucene.analysis.Token) + v.elementAt(i)).termText())); + + } + return q; + } + } + } + + /** + * Base implementation delegates to [EMAIL PROTECTED] #getFieldQuery(String,String)}. + * This method may be overridden, for example, to return + * a SpanNearQuery instead of a PhraseQuery. + * + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFieldQuery(String field, String queryText, int slop) + throws ParseException { + Query query = getFieldQuery(field, queryText); + + if (query instanceof PhraseQuery) { + ((PhraseQuery) query).setSlop(slop); + } + if (query instanceof MultiPhraseQuery) { + ((MultiPhraseQuery) query).setSlop(slop); + } + + return query; + } + + /** + * @exception ParseException throw in overridden method to disallow + */ + protected Query getRangeQuery(String field, + String part1, + String part2, + boolean inclusive) throws ParseException + { + if (lowercaseExpandedTerms) { + part1 = part1.toLowerCase(); + part2 = part2.toLowerCase(); + } + try { + DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); + df.setLenient(true); + Date d1 = df.parse(part1); + Date d2 = df.parse(part2); + part1 = DateTools.dateToString(d1, DateTools.Resolution.DAY); + part2 = DateTools.dateToString(d2, DateTools.Resolution.DAY); + } + catch (Exception e) { } + + return new RangeQuery(new Term(field, part1), + new Term(field, part2), + inclusive); + } + + /** + * Factory method for generating query, given a set of clauses. + * By default creates a boolean query composed of clauses passed in. + * + * Can be overridden by extending classes, to modify query being + * returned. + * + * @param clauses Vector that contains [EMAIL PROTECTED] BooleanClause} instances + * to join. + * + * @return Resulting [EMAIL PROTECTED] Query} object. + * @exception ParseException throw in overridden method to disallow + */ + protected Query getBooleanQuery(Vector clauses) throws ParseException + { + if (clauses == null || clauses.size() == 0) + return null; + + BooleanQuery query = new BooleanQuery(); + for (int i = 0; i < clauses.size(); i++) { + query.add((BooleanClause)clauses.elementAt(i)); + } + return query; + } + + /** + * Factory method for generating a query. Called when parser + * parses an input term token that contains one or more wildcard + * characters (? and *), but is not a prefix term token (one + * that has just a single * character at the end) + *<p> + * Depending on settings, prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wildcard queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token that contains one or more wild card + * characters (? or *), but is not simple prefix term + * + * @return Resulting [EMAIL PROTECTED] Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getWildcardQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new WildcardQuery(t); + } + + /** + * Factory method for generating a query (similar to + * [EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses an input term + * token that uses prefix notation; that is, contains a single '*' wildcard + * character as its last character. Since this is a special case + * of generic wildcard term, and such a query can be optimized easily, + * this usually results in a different query object. + *<p> + * Depending on settings, a prefix term may be lower-cased + * automatically. It will not go through the default Analyzer, + * however, since normal Analyzers are unlikely to work properly + * with wildcard templates. + *<p> + * Can be overridden by extending classes, to provide custom handling for + * wild card queries, which may be necessary due to missing analyzer calls. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * (<b>without</b> trailing '*' character!) + * + * @return Resulting [EMAIL PROTECTED] Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getPrefixQuery(String field, String termStr) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new PrefixQuery(t); + } + + /** + * Factory method for generating a query (similar to + * [EMAIL PROTECTED] #getWildcardQuery}). Called when parser parses + * an input term token that has the fuzzy suffix (~) appended. + * + * @param field Name of the field query will use. + * @param termStr Term token to use for building term for the query + * + * @return Resulting [EMAIL PROTECTED] Query} built for the term + * @exception ParseException throw in overridden method to disallow + */ + protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException + { + if (lowercaseExpandedTerms) { + termStr = termStr.toLowerCase(); + } + Term t = new Term(field, termStr); + return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength); + } + + /** + * Returns a String where the escape char has been + * removed, or kept only once if there was a double escape. + */ + private String discardEscapeChar(String input) { + char[] caSource = input.toCharArray(); + char[] caDest = new char[caSource.length]; + int j = 0; + for (int i = 0; i < caSource.length; i++) { + if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) { + caDest[j++]=caSource[i]; + } + } + return new String(caDest, 0, j); + } + + /** + * Returns a String where those characters that QueryParser + * expects to be escaped are escaped by a preceding <code>\</code>. + */ + public static String escape(String s) { + StringBuffer sb = new StringBuffer(); + for (int i = 0; i < s.length(); i++) { + char c = s.charAt(i); + // NOTE: keep this in sync with _ESCAPED_CHAR below! + if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' + || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' + || c == '*' || c == '?') { + sb.append('\\'); + } + sb.append(c); + } + return sb.toString(); + } + + /** + * Command line tool to test QueryParser, using [EMAIL PROTECTED] org.apache.lucene.analysis.SimpleAnalyzer}. + * Usage:<br> + * <code>java org.apache.lucene.queryParser.QueryParser <input></code> + */ + public static void main(String[] args) throws Exception { + if (args.length == 0) { + System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>"); + System.exit(0); + } + PrecedenceQueryParser qp = new PrecedenceQueryParser("field", + new org.apache.lucene.analysis.SimpleAnalyzer()); + Query q = qp.parse(args[0]); + System.out.println(q.toString("field")); + } +} + +PARSER_END(PrecedenceQueryParser) + +/* ***************** */ +/* Token Definitions */ +/* ***************** */ + +<*> TOKEN : { + <#_NUM_CHAR: ["0"-"9"] > +// NOTE: keep this in sync with escape(String) above! +| <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?" ] > +| <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^", + "[", "]", "\"", "{", "}", "~", "*", "?" ] + | <_ESCAPED_CHAR> ) > +| <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) > +| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") > +} + +<DEFAULT, RangeIn, RangeEx> SKIP : { + <<_WHITESPACE>> +} + +// OG: to support prefix queries: +// http://nagoya.apache.org/bugzilla/show_bug.cgi?id=12137 +// Change from: +// | <WILDTERM: <_TERM_START_CHAR> +// (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +// To: +// +// | <WILDTERM: (<_TERM_CHAR> | ( [ "*", "?" ] ))* > + +<DEFAULT> TOKEN : { + <AND: ("AND" | "&&") > +| <OR: ("OR" | "||") > +| <NOT: ("NOT" | "!") > +| <PLUS: "+" > +| <MINUS: "-" > +| <LPAREN: "(" > +| <RPAREN: ")" > +| <COLON: ":" > +| <CARAT: "^" > : Boost +| <QUOTED: "\"" (~["\""])+ "\""> +| <TERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* > +| <FUZZY_SLOP: "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? > +| <PREFIXTERM: <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" > +| <WILDTERM: <_TERM_START_CHAR> + (<_TERM_CHAR> | ( [ "*", "?" ] ))* > +| <RANGEIN_START: "[" > : RangeIn +| <RANGEEX_START: "{" > : RangeEx +} + +<Boost> TOKEN : { +<NUMBER: (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT +} + +<RangeIn> TOKEN : { +<RANGEIN_TO: "TO"> +| <RANGEIN_END: "]"> : DEFAULT +| <RANGEIN_QUOTED: "\"" (~["\""])+ "\""> +| <RANGEIN_GOOP: (~[ " ", "]" ])+ > +} + +<RangeEx> TOKEN : { +<RANGEEX_TO: "TO"> +| <RANGEEX_END: "}"> : DEFAULT +| <RANGEEX_QUOTED: "\"" (~["\""])+ "\""> +| <RANGEEX_GOOP: (~[ " ", "}" ])+ > +} + +// * Query ::= ( Clause )* +// * Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" ) + +int Conjunction() : { + int ret = CONJ_NONE; +} +{ + [ + <AND> { ret = CONJ_AND; } + | <OR> { ret = CONJ_OR; } + ] + { return ret; } +} + +int Modifier() : { + int ret = MOD_NONE; +} +{ + [ + <PLUS> { ret = MOD_REQ; } + | <MINUS> { ret = MOD_NOT; } + | <NOT> { ret = MOD_NOT; } + ] + { return ret; } +} + +Query Query(String field) : +{ + Vector clauses = new Vector(); + int modifier; + Query q, firstQuery=null; + boolean orPresent = false; +} +{ + modifier=Modifier() q=andExpression(field) + { + addClause(clauses, CONJ_NONE, modifier, q); + firstQuery=q; + } + ( + [<OR> { orPresent=true; }] modifier=Modifier() q=andExpression(field) + { addClause(clauses, orPresent ? CONJ_OR : CONJ_NONE, modifier, q); } + )* + { + if (clauses.size() == 1 && firstQuery != null) + return firstQuery; + else { + return getBooleanQuery(clauses); + } + } +} + +/* +Query orExpression(String field) : +{ + Vector clauses = new Vector(); + Query q, firstQuery=null; + int modifier; +} +{ + q=andExpression(field) + { + addClause(clauses, CONJ_NONE, MOD_NONE, q); + firstQuery=q; + } + ( + <OR> modifier=Modifier() q=andExpression(field) + { addClause(clauses, CONJ_OR, modifier, q); } + )* + { + if (clauses.size() == 1 && firstQuery != null) + return firstQuery; + else { + return getBooleanQuery(clauses); + } + } +} +*/ + +Query andExpression(String field) : +{ + Vector clauses = new Vector(); + Query q, firstQuery=null; + int modifier; +} +{ + q=Clause(field) + { + addClause(clauses, CONJ_NONE, MOD_NONE, q); + firstQuery=q; + } + ( + <AND> modifier=Modifier() q=Clause(field) + { addClause(clauses, CONJ_AND, modifier, q); } + )* + { + if (clauses.size() == 1 && firstQuery != null) + return firstQuery; + else { + return getBooleanQuery(clauses); + } + } +} + +Query Clause(String field) : { + Query q; + Token fieldToken=null, boost=null; +} +{ + [ + LOOKAHEAD(2) + fieldToken=<TERM> <COLON> { + field=discardEscapeChar(fieldToken.image); + } + ] + + ( + q=Term(field) + | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)? + + ) + { + if (boost != null) { + float f = (float)1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + q.setBoost(f); + } catch (Exception ignored) { } + } + return q; + } +} + + +Query Term(String field) : { + Token term, boost=null, fuzzySlop=null, goop1, goop2; + boolean prefix = false; + boolean wildcard = false; + boolean fuzzy = false; + boolean rangein = false; + Query q; +} +{ + ( + ( + term=<TERM> + | term=<PREFIXTERM> { prefix=true; } + | term=<WILDTERM> { wildcard=true; } + | term=<NUMBER> + ) + [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] + [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ] + { + String termImage=discardEscapeChar(term.image); + if (wildcard) { + q = getWildcardQuery(field, termImage); + } else if (prefix) { + q = getPrefixQuery(field, + discardEscapeChar(term.image.substring + (0, term.image.length()-1))); + } else if (fuzzy) { + float fms = fuzzyMinSim; + try { + fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue(); + } catch (Exception ignored) { } + if(fms < 0.0f || fms > 1.0f){ + throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); + } + q = getFuzzyQuery(field, termImage, fms); + } else { + q = getFieldQuery(field, termImage); + } + } + | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> ) + [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> ) + <RANGEIN_END> ) + [ <CARAT> boost=<NUMBER> ] + { + if (goop1.kind == RANGEIN_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEIN_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } + q = getRangeQuery(field, goop1.image, goop2.image, true); + } + | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> ) + [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> ) + <RANGEEX_END> ) + [ <CARAT> boost=<NUMBER> ] + { + if (goop1.kind == RANGEEX_QUOTED) { + goop1.image = goop1.image.substring(1, goop1.image.length()-1); + } else { + goop1.image = discardEscapeChar(goop1.image); + } + if (goop2.kind == RANGEEX_QUOTED) { + goop2.image = goop2.image.substring(1, goop2.image.length()-1); + } else { + goop2.image = discardEscapeChar(goop2.image); + } + + q = getRangeQuery(field, goop1.image, goop2.image, false); + } + | term=<QUOTED> + [ fuzzySlop=<FUZZY_SLOP> ] + [ <CARAT> boost=<NUMBER> ] + { + int s = phraseSlop; + + if (fuzzySlop != null) { + try { + s = Float.valueOf(fuzzySlop.image.substring(1)).intValue(); + } + catch (Exception ignored) { } + } + q = getFieldQuery(field, term.image.substring(1, term.image.length()-1), s); + } + ) + { + if (boost != null) { + float f = (float) 1.0; + try { + f = Float.valueOf(boost.image).floatValue(); + } + catch (Exception ignored) { + /* Should this be handled somehow? (defaults to "no boost", if + * boost number is invalid) + */ + } + + // avoid boosting null queries, such as those caused by stop words + if (q != null) { + q.setBoost(f); + } + } + return q; + } +}
Added: lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java?view=auto&rev=156431 ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserConstants.java Mon Mar 7 08:08:02 2005 @@ -0,0 +1,78 @@ +/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserConstants.java */ +package org.apache.lucene.queryParser.precedence; + +public interface PrecedenceQueryParserConstants { + + int EOF = 0; + int _NUM_CHAR = 1; + int _ESCAPED_CHAR = 2; + int _TERM_START_CHAR = 3; + int _TERM_CHAR = 4; + int _WHITESPACE = 5; + int AND = 7; + int OR = 8; + int NOT = 9; + int PLUS = 10; + int MINUS = 11; + int LPAREN = 12; + int RPAREN = 13; + int COLON = 14; + int CARAT = 15; + int QUOTED = 16; + int TERM = 17; + int FUZZY_SLOP = 18; + int PREFIXTERM = 19; + int WILDTERM = 20; + int RANGEIN_START = 21; + int RANGEEX_START = 22; + int NUMBER = 23; + int RANGEIN_TO = 24; + int RANGEIN_END = 25; + int RANGEIN_QUOTED = 26; + int RANGEIN_GOOP = 27; + int RANGEEX_TO = 28; + int RANGEEX_END = 29; + int RANGEEX_QUOTED = 30; + int RANGEEX_GOOP = 31; + + int Boost = 0; + int RangeEx = 1; + int RangeIn = 2; + int DEFAULT = 3; + + String[] tokenImage = { + "<EOF>", + "<_NUM_CHAR>", + "<_ESCAPED_CHAR>", + "<_TERM_START_CHAR>", + "<_TERM_CHAR>", + "<_WHITESPACE>", + "<token of kind 6>", + "<AND>", + "<OR>", + "<NOT>", + "\"+\"", + "\"-\"", + "\"(\"", + "\")\"", + "\":\"", + "\"^\"", + "<QUOTED>", + "<TERM>", + "<FUZZY_SLOP>", + "<PREFIXTERM>", + "<WILDTERM>", + "\"[\"", + "\"{\"", + "<NUMBER>", + "\"TO\"", + "\"]\"", + "<RANGEIN_QUOTED>", + "<RANGEIN_GOOP>", + "\"TO\"", + "\"}\"", + "<RANGEEX_QUOTED>", + "<RANGEEX_GOOP>", + }; + +} Added: lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java?view=auto&rev=156431 ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/PrecedenceQueryParserTokenManager.java Mon Mar 7 08:08:02 2005 @@ -0,0 +1,1055 @@ +/* Generated By:JavaCC: Do not edit this line. PrecedenceQueryParserTokenManager.java */ +package org.apache.lucene.queryParser.precedence; +import java.util.Vector; +import java.io.*; +import java.text.*; +import java.util.*; +import org.apache.lucene.index.Term; +import org.apache.lucene.analysis.*; +import org.apache.lucene.document.*; +import org.apache.lucene.search.*; +import org.apache.lucene.util.Parameter; + +public class PrecedenceQueryParserTokenManager implements PrecedenceQueryParserConstants +{ + public java.io.PrintStream debugStream = System.out; + public void setDebugStream(java.io.PrintStream ds) { debugStream = ds; } +private final int jjStopStringLiteralDfa_3(int pos, long active0) +{ + switch (pos) + { + default : + return -1; + } +} +private final int jjStartNfa_3(int pos, long active0) +{ + return jjMoveNfa_3(jjStopStringLiteralDfa_3(pos, active0), pos + 1); +} +private final int jjStopAtPos(int pos, int kind) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + return pos + 1; +} +private final int jjStartNfaWithStates_3(int pos, int kind, int state) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_3(state, pos + 1); +} +private final int jjMoveStringLiteralDfa0_3() +{ + switch(curChar) + { + case 40: + return jjStopAtPos(0, 12); + case 41: + return jjStopAtPos(0, 13); + case 43: + return jjStopAtPos(0, 10); + case 45: + return jjStopAtPos(0, 11); + case 58: + return jjStopAtPos(0, 14); + case 91: + return jjStopAtPos(0, 21); + case 94: + return jjStopAtPos(0, 15); + case 123: + return jjStopAtPos(0, 22); + default : + return jjMoveNfa_3(0, 0); + } +} +private final void jjCheckNAdd(int state) +{ + if (jjrounds[state] != jjround) + { + jjstateSet[jjnewStateCnt++] = state; + jjrounds[state] = jjround; + } +} +private final void jjAddStates(int start, int end) +{ + do { + jjstateSet[jjnewStateCnt++] = jjnextStates[start]; + } while (start++ != end); +} +private final void jjCheckNAddTwoStates(int state1, int state2) +{ + jjCheckNAdd(state1); + jjCheckNAdd(state2); +} +private final void jjCheckNAddStates(int start, int end) +{ + do { + jjCheckNAdd(jjnextStates[start]); + } while (start++ != end); +} +private final void jjCheckNAddStates(int start) +{ + jjCheckNAdd(jjnextStates[start]); + jjCheckNAdd(jjnextStates[start + 1]); +} +static final long[] jjbitVec0 = { + 0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL +}; +static final long[] jjbitVec2 = { + 0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL +}; +private final int jjMoveNfa_3(int startState, int curPos) +{ + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 33; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x7bffd0f8ffffd9ffL & l) != 0L) + { + if (kind > 17) + kind = 17; + jjCheckNAddStates(0, 6); + } + else if ((0x100002600L & l) != 0L) + { + if (kind > 6) + kind = 6; + } + else if (curChar == 34) + jjCheckNAdd(15); + else if (curChar == 33) + { + if (kind > 9) + kind = 9; + } + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 4; + break; + case 4: + if (curChar == 38 && kind > 7) + kind = 7; + break; + case 5: + if (curChar == 38) + jjstateSet[jjnewStateCnt++] = 4; + break; + case 13: + if (curChar == 33 && kind > 9) + kind = 9; + break; + case 14: + if (curChar == 34) + jjCheckNAdd(15); + break; + case 15: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddTwoStates(15, 16); + break; + case 16: + if (curChar == 34 && kind > 16) + kind = 16; + break; + case 18: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 18) + kind = 18; + jjAddStates(7, 8); + break; + case 19: + if (curChar == 46) + jjCheckNAdd(20); + break; + case 20: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 18) + kind = 18; + jjCheckNAdd(20); + break; + case 21: + if ((0x7bffd0f8ffffd9ffL & l) == 0L) + break; + if (kind > 17) + kind = 17; + jjCheckNAddStates(0, 6); + break; + case 22: + if ((0x7bfff8f8ffffd9ffL & l) == 0L) + break; + if (kind > 17) + kind = 17; + jjCheckNAddTwoStates(22, 23); + break; + case 24: + if ((0x84002f0600000000L & l) == 0L) + break; + if (kind > 17) + kind = 17; + jjCheckNAddTwoStates(22, 23); + break; + case 25: + if ((0x7bfff8f8ffffd9ffL & l) != 0L) + jjCheckNAddStates(9, 11); + break; + case 26: + if (curChar == 42 && kind > 19) + kind = 19; + break; + case 28: + if ((0x84002f0600000000L & l) != 0L) + jjCheckNAddStates(9, 11); + break; + case 29: + if ((0xfbfffcf8ffffd9ffL & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); + break; + case 31: + if ((0x84002f0600000000L & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x97ffffff97ffffffL & l) != 0L) + { + if (kind > 17) + kind = 17; + jjCheckNAddStates(0, 6); + } + else if (curChar == 126) + { + if (kind > 18) + kind = 18; + jjstateSet[jjnewStateCnt++] = 18; + } + if (curChar == 92) + jjCheckNAddStates(12, 14); + else if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 11; + else if (curChar == 124) + jjstateSet[jjnewStateCnt++] = 8; + else if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 6; + else if (curChar == 65) + jjstateSet[jjnewStateCnt++] = 2; + break; + case 1: + if (curChar == 68 && kind > 7) + kind = 7; + break; + case 2: + if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 1; + break; + case 3: + if (curChar == 65) + jjstateSet[jjnewStateCnt++] = 2; + break; + case 6: + if (curChar == 82 && kind > 8) + kind = 8; + break; + case 7: + if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 6; + break; + case 8: + if (curChar == 124 && kind > 8) + kind = 8; + break; + case 9: + if (curChar == 124) + jjstateSet[jjnewStateCnt++] = 8; + break; + case 10: + if (curChar == 84 && kind > 9) + kind = 9; + break; + case 11: + if (curChar == 79) + jjstateSet[jjnewStateCnt++] = 10; + break; + case 12: + if (curChar == 78) + jjstateSet[jjnewStateCnt++] = 11; + break; + case 15: + jjAddStates(15, 16); + break; + case 17: + if (curChar != 126) + break; + if (kind > 18) + kind = 18; + jjstateSet[jjnewStateCnt++] = 18; + break; + case 21: + if ((0x97ffffff97ffffffL & l) == 0L) + break; + if (kind > 17) + kind = 17; + jjCheckNAddStates(0, 6); + break; + case 22: + if ((0x97ffffff97ffffffL & l) == 0L) + break; + if (kind > 17) + kind = 17; + jjCheckNAddTwoStates(22, 23); + break; + case 23: + if (curChar == 92) + jjCheckNAddTwoStates(24, 24); + break; + case 24: + if ((0x6800000078000000L & l) == 0L) + break; + if (kind > 17) + kind = 17; + jjCheckNAddTwoStates(22, 23); + break; + case 25: + if ((0x97ffffff97ffffffL & l) != 0L) + jjCheckNAddStates(9, 11); + break; + case 27: + if (curChar == 92) + jjCheckNAddTwoStates(28, 28); + break; + case 28: + if ((0x6800000078000000L & l) != 0L) + jjCheckNAddStates(9, 11); + break; + case 29: + if ((0x97ffffff97ffffffL & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); + break; + case 30: + if (curChar == 92) + jjCheckNAddTwoStates(31, 31); + break; + case 31: + if ((0x6800000078000000L & l) == 0L) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); + break; + case 32: + if (curChar == 92) + jjCheckNAddStates(12, 14); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 17) + kind = 17; + jjCheckNAddStates(0, 6); + break; + case 15: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjAddStates(15, 16); + break; + case 22: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 17) + kind = 17; + jjCheckNAddTwoStates(22, 23); + break; + case 25: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjCheckNAddStates(9, 11); + break; + case 29: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 20) + kind = 20; + jjCheckNAddTwoStates(29, 30); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 33 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjStopStringLiteralDfa_1(int pos, long active0) +{ + switch (pos) + { + case 0: + if ((active0 & 0x10000000L) != 0L) + { + jjmatchedKind = 31; + return 4; + } + return -1; + default : + return -1; + } +} +private final int jjStartNfa_1(int pos, long active0) +{ + return jjMoveNfa_1(jjStopStringLiteralDfa_1(pos, active0), pos + 1); +} +private final int jjStartNfaWithStates_1(int pos, int kind, int state) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_1(state, pos + 1); +} +private final int jjMoveStringLiteralDfa0_1() +{ + switch(curChar) + { + case 84: + return jjMoveStringLiteralDfa1_1(0x10000000L); + case 125: + return jjStopAtPos(0, 29); + default : + return jjMoveNfa_1(0, 0); + } +} +private final int jjMoveStringLiteralDfa1_1(long active0) +{ + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_1(0, active0); + return 1; + } + switch(curChar) + { + case 79: + if ((active0 & 0x10000000L) != 0L) + return jjStartNfaWithStates_1(1, 28, 4); + break; + default : + break; + } + return jjStartNfa_1(0, active0); +} +private final int jjMoveNfa_1(int startState, int curPos) +{ + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 5; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0xfffffffeffffffffL & l) != 0L) + { + if (kind > 31) + kind = 31; + jjCheckNAdd(4); + } + if ((0x100002600L & l) != 0L) + { + if (kind > 6) + kind = 6; + } + else if (curChar == 34) + jjCheckNAdd(2); + break; + case 1: + if (curChar == 34) + jjCheckNAdd(2); + break; + case 2: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddTwoStates(2, 3); + break; + case 3: + if (curChar == 34 && kind > 30) + kind = 30; + break; + case 4: + if ((0xfffffffeffffffffL & l) == 0L) + break; + if (kind > 31) + kind = 31; + jjCheckNAdd(4); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + case 4: + if ((0xdfffffffffffffffL & l) == 0L) + break; + if (kind > 31) + kind = 31; + jjCheckNAdd(4); + break; + case 2: + jjAddStates(17, 18); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + case 4: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 31) + kind = 31; + jjCheckNAdd(4); + break; + case 2: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjAddStates(17, 18); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjMoveStringLiteralDfa0_0() +{ + return jjMoveNfa_0(0, 0); +} +private final int jjMoveNfa_0(int startState, int curPos) +{ + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 3; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 23) + kind = 23; + jjAddStates(19, 20); + break; + case 1: + if (curChar == 46) + jjCheckNAdd(2); + break; + case 2: + if ((0x3ff000000000000L & l) == 0L) + break; + if (kind > 23) + kind = 23; + jjCheckNAdd(2); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 3 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +private final int jjStopStringLiteralDfa_2(int pos, long active0) +{ + switch (pos) + { + case 0: + if ((active0 & 0x1000000L) != 0L) + { + jjmatchedKind = 27; + return 4; + } + return -1; + default : + return -1; + } +} +private final int jjStartNfa_2(int pos, long active0) +{ + return jjMoveNfa_2(jjStopStringLiteralDfa_2(pos, active0), pos + 1); +} +private final int jjStartNfaWithStates_2(int pos, int kind, int state) +{ + jjmatchedKind = kind; + jjmatchedPos = pos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return pos + 1; } + return jjMoveNfa_2(state, pos + 1); +} +private final int jjMoveStringLiteralDfa0_2() +{ + switch(curChar) + { + case 84: + return jjMoveStringLiteralDfa1_2(0x1000000L); + case 93: + return jjStopAtPos(0, 25); + default : + return jjMoveNfa_2(0, 0); + } +} +private final int jjMoveStringLiteralDfa1_2(long active0) +{ + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { + jjStopStringLiteralDfa_2(0, active0); + return 1; + } + switch(curChar) + { + case 79: + if ((active0 & 0x1000000L) != 0L) + return jjStartNfaWithStates_2(1, 24, 4); + break; + default : + break; + } + return jjStartNfa_2(0, active0); +} +private final int jjMoveNfa_2(int startState, int curPos) +{ + int[] nextStates; + int startsAt = 0; + jjnewStateCnt = 5; + int i = 1; + jjstateSet[0] = startState; + int j, kind = 0x7fffffff; + for (;;) + { + if (++jjround == 0x7fffffff) + ReInitRounds(); + if (curChar < 64) + { + long l = 1L << curChar; + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + if ((0xfffffffeffffffffL & l) != 0L) + { + if (kind > 27) + kind = 27; + jjCheckNAdd(4); + } + if ((0x100002600L & l) != 0L) + { + if (kind > 6) + kind = 6; + } + else if (curChar == 34) + jjCheckNAdd(2); + break; + case 1: + if (curChar == 34) + jjCheckNAdd(2); + break; + case 2: + if ((0xfffffffbffffffffL & l) != 0L) + jjCheckNAddTwoStates(2, 3); + break; + case 3: + if (curChar == 34 && kind > 26) + kind = 26; + break; + case 4: + if ((0xfffffffeffffffffL & l) == 0L) + break; + if (kind > 27) + kind = 27; + jjCheckNAdd(4); + break; + default : break; + } + } while(i != startsAt); + } + else if (curChar < 128) + { + long l = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + case 4: + if ((0xffffffffdfffffffL & l) == 0L) + break; + if (kind > 27) + kind = 27; + jjCheckNAdd(4); + break; + case 2: + jjAddStates(17, 18); + break; + default : break; + } + } while(i != startsAt); + } + else + { + int hiByte = (int)(curChar >> 8); + int i1 = hiByte >> 6; + long l1 = 1L << (hiByte & 077); + int i2 = (curChar & 0xff) >> 6; + long l2 = 1L << (curChar & 077); + MatchLoop: do + { + switch(jjstateSet[--i]) + { + case 0: + case 4: + if (!jjCanMove_0(hiByte, i1, i2, l1, l2)) + break; + if (kind > 27) + kind = 27; + jjCheckNAdd(4); + break; + case 2: + if (jjCanMove_0(hiByte, i1, i2, l1, l2)) + jjAddStates(17, 18); + break; + default : break; + } + } while(i != startsAt); + } + if (kind != 0x7fffffff) + { + jjmatchedKind = kind; + jjmatchedPos = curPos; + kind = 0x7fffffff; + } + ++curPos; + if ((i = jjnewStateCnt) == (startsAt = 5 - (jjnewStateCnt = startsAt))) + return curPos; + try { curChar = input_stream.readChar(); } + catch(java.io.IOException e) { return curPos; } + } +} +static final int[] jjnextStates = { + 22, 25, 26, 29, 30, 27, 23, 18, 19, 25, 26, 27, 24, 28, 31, 15, + 16, 2, 3, 0, 1, +}; +private static final boolean jjCanMove_0(int hiByte, int i1, int i2, long l1, long l2) +{ + switch(hiByte) + { + case 0: + return ((jjbitVec2[i2] & l2) != 0L); + default : + if ((jjbitVec0[i1] & l1) != 0L) + return true; + return false; + } +} +public static final String[] jjstrLiteralImages = { +"", null, null, null, null, null, null, null, null, null, "\53", "\55", "\50", +"\51", "\72", "\136", null, null, null, null, null, "\133", "\173", null, "\124\117", +"\135", null, null, "\124\117", "\175", null, null, }; +public static final String[] lexStateNames = { + "Boost", + "RangeEx", + "RangeIn", + "DEFAULT", +}; +public static final int[] jjnewLexState = { + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, -1, -1, -1, -1, 2, 1, 3, -1, + 3, -1, -1, -1, 3, -1, -1, +}; +static final long[] jjtoToken = { + 0xffffff81L, +}; +static final long[] jjtoSkip = { + 0x40L, +}; +protected CharStream input_stream; +private final int[] jjrounds = new int[33]; +private final int[] jjstateSet = new int[66]; +protected char curChar; +public PrecedenceQueryParserTokenManager(CharStream stream) +{ + input_stream = stream; +} +public PrecedenceQueryParserTokenManager(CharStream stream, int lexState) +{ + this(stream); + SwitchTo(lexState); +} +public void ReInit(CharStream stream) +{ + jjmatchedPos = jjnewStateCnt = 0; + curLexState = defaultLexState; + input_stream = stream; + ReInitRounds(); +} +private final void ReInitRounds() +{ + int i; + jjround = 0x80000001; + for (i = 33; i-- > 0;) + jjrounds[i] = 0x80000000; +} +public void ReInit(CharStream stream, int lexState) +{ + ReInit(stream); + SwitchTo(lexState); +} +public void SwitchTo(int lexState) +{ + if (lexState >= 4 || lexState < 0) + throw new TokenMgrError("Error: Ignoring invalid lexical state : " + lexState + ". State unchanged.", TokenMgrError.INVALID_LEXICAL_STATE); + else + curLexState = lexState; +} + +protected Token jjFillToken() +{ + Token t = Token.newToken(jjmatchedKind); + t.kind = jjmatchedKind; + String im = jjstrLiteralImages[jjmatchedKind]; + t.image = (im == null) ? input_stream.GetImage() : im; + t.beginLine = input_stream.getBeginLine(); + t.beginColumn = input_stream.getBeginColumn(); + t.endLine = input_stream.getEndLine(); + t.endColumn = input_stream.getEndColumn(); + return t; +} + +int curLexState = 3; +int defaultLexState = 3; +int jjnewStateCnt; +int jjround; +int jjmatchedPos; +int jjmatchedKind; + +public Token getNextToken() +{ + int kind; + Token specialToken = null; + Token matchedToken; + int curPos = 0; + + EOFLoop : + for (;;) + { + try + { + curChar = input_stream.BeginToken(); + } + catch(java.io.IOException e) + { + jjmatchedKind = 0; + matchedToken = jjFillToken(); + return matchedToken; + } + + switch(curLexState) + { + case 0: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_0(); + break; + case 1: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_1(); + break; + case 2: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_2(); + break; + case 3: + jjmatchedKind = 0x7fffffff; + jjmatchedPos = 0; + curPos = jjMoveStringLiteralDfa0_3(); + break; + } + if (jjmatchedKind != 0x7fffffff) + { + if (jjmatchedPos + 1 < curPos) + input_stream.backup(curPos - jjmatchedPos - 1); + if ((jjtoToken[jjmatchedKind >> 6] & (1L << (jjmatchedKind & 077))) != 0L) + { + matchedToken = jjFillToken(); + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + return matchedToken; + } + else + { + if (jjnewLexState[jjmatchedKind] != -1) + curLexState = jjnewLexState[jjmatchedKind]; + continue EOFLoop; + } + } + int error_line = input_stream.getEndLine(); + int error_column = input_stream.getEndColumn(); + String error_after = null; + boolean EOFSeen = false; + try { input_stream.readChar(); input_stream.backup(1); } + catch (java.io.IOException e1) { + EOFSeen = true; + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + if (curChar == '\n' || curChar == '\r') { + error_line++; + error_column = 0; + } + else + error_column++; + } + if (!EOFSeen) { + input_stream.backup(1); + error_after = curPos <= 1 ? "" : input_stream.GetImage(); + } + throw new TokenMgrError(EOFSeen, curLexState, error_line, error_column, error_after, curChar, TokenMgrError.LEXICAL_ERROR); + } +} + +} Added: lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/Token.java URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/Token.java?view=auto&rev=156431 ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/Token.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/Token.java Mon Mar 7 08:08:02 2005 @@ -0,0 +1,81 @@ +/* Generated By:JavaCC: Do not edit this line. Token.java Version 3.0 */ +package org.apache.lucene.queryParser.precedence; + +/** + * Describes the input token stream. + */ + +public class Token { + + /** + * An integer that describes the kind of this token. This numbering + * system is determined by JavaCCParser, and a table of these numbers is + * stored in the file ...Constants.java. + */ + public int kind; + + /** + * beginLine and beginColumn describe the position of the first character + * of this token; endLine and endColumn describe the position of the + * last character of this token. + */ + public int beginLine, beginColumn, endLine, endColumn; + + /** + * The string image of the token. + */ + public String image; + + /** + * A reference to the next regular (non-special) token from the input + * stream. If this is the last token from the input stream, or if the + * token manager has not read tokens beyond this one, this field is + * set to null. This is true only if this token is also a regular + * token. Otherwise, see below for a description of the contents of + * this field. + */ + public Token next; + + /** + * This field is used to access special tokens that occur prior to this + * token, but after the immediately preceding regular (non-special) token. + * If there are no such special tokens, this field is set to null. + * When there are more than one such special token, this field refers + * to the last of these special tokens, which in turn refers to the next + * previous special token through its specialToken field, and so on + * until the first special token (whose specialToken field is null). + * The next fields of special tokens refer to other special tokens that + * immediately follow it (without an intervening regular token). If there + * is no such token, this field is null. + */ + public Token specialToken; + + /** + * Returns the image. + */ + public String toString() + { + return image; + } + + /** + * Returns a new Token object, by default. However, if you want, you + * can create and return subclass objects based on the value of ofKind. + * Simply add the cases to the switch for all those special cases. + * For example, if you have a subclass of Token called IDToken that + * you want to create if ofKind is ID, simlpy add something like : + * + * case MyParserConstants.ID : return new IDToken(); + * + * to the following switch statement. Then you can cast matchedToken + * variable to the appropriate type and use it in your lexical actions. + */ + public static final Token newToken(int ofKind) + { + switch(ofKind) + { + default : return new Token(); + } + } + +} Added: lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java?view=auto&rev=156431 ============================================================================== --- lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java (added) +++ lucene/java/trunk/src/java/org/apache/lucene/queryParser/precedence/TokenMgrError.java Mon Mar 7 08:08:02 2005 @@ -0,0 +1,133 @@ +/* Generated By:JavaCC: Do not edit this line. TokenMgrError.java Version 3.0 */ +package org.apache.lucene.queryParser.precedence; + +public class TokenMgrError extends Error +{ + /* + * Ordinals for various reasons why an Error of this type can be thrown. + */ + + /** + * Lexical error occured. + */ + static final int LEXICAL_ERROR = 0; + + /** + * An attempt wass made to create a second instance of a static token manager. + */ + static final int STATIC_LEXER_ERROR = 1; + + /** + * Tried to change to an invalid lexical state. + */ + static final int INVALID_LEXICAL_STATE = 2; + + /** + * Detected (and bailed out of) an infinite loop in the token manager. + */ + static final int LOOP_DETECTED = 3; + + /** + * Indicates the reason why the exception is thrown. It will have + * one of the above 4 values. + */ + int errorCode; + + /** + * Replaces unprintable characters by their espaced (or unicode escaped) + * equivalents in the given string + */ + protected static final String addEscapes(String str) { + StringBuffer retval = new StringBuffer(); + char ch; + for (int i = 0; i < str.length(); i++) { + switch (str.charAt(i)) + { + case 0 : + continue; + case '\b': + retval.append("\\b"); + continue; + case '\t': + retval.append("\\t"); + continue; + case '\n': + retval.append("\\n"); + continue; + case '\f': + retval.append("\\f"); + continue; + case '\r': + retval.append("\\r"); + continue; + case '\"': + retval.append("\\\""); + continue; + case '\'': + retval.append("\\\'"); + continue; + case '\\': + retval.append("\\\\"); + continue; + default: + if ((ch = str.charAt(i)) < 0x20 || ch > 0x7e) { + String s = "0000" + Integer.toString(ch, 16); + retval.append("\\u" + s.substring(s.length() - 4, s.length())); + } else { + retval.append(ch); + } + continue; + } + } + return retval.toString(); + } + + /** + * Returns a detailed message for the Error when it is thrown by the + * token manager to indicate a lexical error. + * Parameters : + * EOFSeen : indicates if EOF caused the lexicl error + * curLexState : lexical state in which this error occured + * errorLine : line number when the error occured + * errorColumn : column number when the error occured + * errorAfter : prefix that was seen before this error occured + * curchar : the offending character + * Note: You can customize the lexical error message by modifying this method. + */ + protected static String LexicalError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar) { + return("Lexical error at line " + + errorLine + ", column " + + errorColumn + ". Encountered: " + + (EOFSeen ? "<EOF> " : ("\"" + addEscapes(String.valueOf(curChar)) + "\"") + " (" + (int)curChar + "), ") + + "after : \"" + addEscapes(errorAfter) + "\""); + } + + /** + * You can also modify the body of this method to customize your error messages. + * For example, cases like LOOP_DETECTED and INVALID_LEXICAL_STATE are not + * of end-users concern, so you can return something like : + * + * "Internal Error : Please file a bug report .... " + * + * from this method for such cases in the release version of your parser. + */ + public String getMessage() { + return super.getMessage(); + } + + /* + * Constructors of various flavors follow. + */ + + public TokenMgrError() { + } + + public TokenMgrError(String message, int reason) { + super(message); + errorCode = reason; + } + + public TokenMgrError(boolean EOFSeen, int lexState, int errorLine, int errorColumn, String errorAfter, char curChar, int reason) { + this(LexicalError(EOFSeen, lexState, errorLine, errorColumn, errorAfter, curChar), reason); + } +} Modified: lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java?view=diff&r1=156430&r2=156431 ============================================================================== --- lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java (original) +++ lucene/java/trunk/src/test/org/apache/lucene/queryParser/TestQueryParser.java Mon Mar 7 08:08:02 2005 @@ -522,6 +522,16 @@ } } + /** + * This test differs from TestPrecedenceQueryParser + */ + public void testPrecedence() throws Exception { + Query query1 = QueryParser.parse("A AND B OR C AND D", "field", new WhitespaceAnalyzer()); + Query query2 = QueryParser.parse("+A +B +C +D", "field", new WhitespaceAnalyzer()); + assertEquals(query1, query2); + } + + public void tearDown() { BooleanQuery.setMaxClauseCount(originalMaxClauses); }
