goller 2005/01/24 11:21:01 Modified: src/java/org/apache/lucene/search ConjunctionScorer.java BooleanQuery.java src/test/org/apache/lucene/search CheckHits.java Added: src/java/org/apache/lucene/search ReqOptSumScorer.java BooleanScorer2.java DisjunctionSumScorer.java NonMatchingScorer.java ReqExclScorer.java src/test/org/apache/lucene/search TestBoolean2.java Log: New BooleanScorer implemented by Paul Elschot (Patch 31785) that implements skipTo and delivers documents in correct order. Furthermore a small bug in ConjunctionScorer skipTo (if called without a preceeding next) is eliminated. Revision Changes Path 1.7 +25 -14 jakarta-lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java Index: ConjunctionScorer.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/ConjunctionScorer.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- ConjunctionScorer.java 6 Sep 2004 12:07:04 -0000 1.6 +++ ConjunctionScorer.java 24 Jan 2005 19:21:01 -0000 1.7 @@ -17,10 +17,13 @@ */ import java.io.IOException; -import java.util.*; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.LinkedList; /** Scorer for conjunctions, sets of queries, all of which are required. */ -final class ConjunctionScorer extends Scorer { +class ConjunctionScorer extends Scorer { private LinkedList scorers = new LinkedList(); private boolean firstTime = true; private boolean more = true; @@ -41,7 +44,7 @@ public boolean next() throws IOException { if (firstTime) { - init(); + init(true); } else if (more) { more = last().next(); // trigger further scanning } @@ -57,12 +60,18 @@ } public boolean skipTo(int target) throws IOException { + if(firstTime) { + init(false); + } + Iterator i = scorers.iterator(); while (more && i.hasNext()) { more = ((Scorer)i.next()).skipTo(target); } + if (more) sortScorers(); // re-sort scorers + return doNext(); } @@ -74,20 +83,22 @@ score *= coord; return score; } - - private void init() throws IOException { - more = scorers.size() > 0; - - // compute coord factor + + private void init(boolean initScorers) throws IOException { + // compute coord factor coord = getSimilarity().coord(scorers.size(), scorers.size()); + + more = scorers.size() > 0; - // move each scorer to its first entry - Iterator i = scorers.iterator(); - while (more && i.hasNext()) { - more = ((Scorer)i.next()).next(); + if(initScorers){ + // move each scorer to its first entry + Iterator i = scorers.iterator(); + while (more && i.hasNext()) { + more = ((Scorer)i.next()).next(); + } + if (more) + sortScorers(); // initial sort of list } - if (more) - sortScorers(); // initial sort of list firstTime = false; } 1.29 +43 -5 jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java Index: BooleanQuery.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/search/BooleanQuery.java,v retrieving revision 1.28 retrieving revision 1.29 diff -u -r1.28 -r1.29 --- BooleanQuery.java 14 Dec 2004 19:28:44 -0000 1.28 +++ BooleanQuery.java 24 Jan 2005 19:21:01 -0000 1.29 @@ -1,7 +1,7 @@ package org.apache.lucene.search; /** - * Copyright 2004 The Apache Software Foundation + * Copyright 2004-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,7 +21,7 @@ import org.apache.lucene.index.IndexReader; /** A Query that matches documents matching boolean combinations of other - queries, typically [EMAIL PROTECTED] TermQuery}s or [EMAIL PROTECTED] PhraseQuery}s. + * queries, typically [EMAIL PROTECTED] TermQuery}s or [EMAIL PROTECTED] PhraseQuery}s. */ public class BooleanQuery extends Query { @@ -117,8 +117,8 @@ } private class BooleanWeight implements Weight { - private Searcher searcher; - private Vector weights = new Vector(); + protected Searcher searcher; + protected Vector weights = new Vector(); public BooleanWeight(Searcher searcher) { this.searcher = searcher; @@ -126,6 +126,7 @@ BooleanClause c = (BooleanClause)clauses.elementAt(i); weights.add(c.getQuery().createWeight(searcher)); } + //System.out.println("Creating " + getClass().getName()); } public Query getQuery() { return BooleanQuery.this; } @@ -156,6 +157,7 @@ } } + /** @return A good old 1.4 Scorer */ public Scorer scorer(IndexReader reader) throws IOException { // First see if the (faster) ConjunctionScorer will work. This can be // used when all clauses are required. Also, at this point a @@ -246,8 +248,44 @@ } } + private class BooleanWeight2 extends BooleanWeight { + /* Merge into BooleanWeight in case the 1.4 BooleanScorer is dropped */ + public BooleanWeight2(Searcher searcher) { super(searcher); } + + /** @return An alternative Scorer that uses and provides skipTo(), + * and scores documents in document number order. + */ + public Scorer scorer(IndexReader reader) throws IOException { + BooleanScorer2 result = new BooleanScorer2(getSimilarity(searcher)); + + for (int i = 0 ; i < weights.size(); i++) { + BooleanClause c = (BooleanClause)clauses.elementAt(i); + Weight w = (Weight)weights.elementAt(i); + Scorer subScorer = w.scorer(reader); + if (subScorer != null) + result.add(subScorer, c.isRequired(), c.isProhibited()); + else if (c.isRequired()) + return null; + } + + return result; + } + } + + /** Indicates whether to use good old 1.4 BooleanScorer. */ + private static boolean useScorer14 = false; + + public static void setUseScorer14(boolean use14) { + useScorer14 = use14; + } + + public static boolean getUseScorer14() { + return useScorer14; + } + protected Weight createWeight(Searcher searcher) { - return new BooleanWeight(searcher); + return getUseScorer14() ? (Weight) new BooleanWeight(searcher) + : (Weight) new BooleanWeight2(searcher); } public Query rewrite(IndexReader reader) throws IOException { 1.1 jakarta-lucene/src/java/org/apache/lucene/search/ReqOptSumScorer.java Index: ReqOptSumScorer.java =================================================================== package org.apache.lucene.search; /** * Copyright 2005 Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; /** A Scorer for queries with a required part and an optional part. * Delays skipTo() on the optional part until a score() is needed. * <br> * This <code>Scorer</code> implements [EMAIL PROTECTED] Scorer#skipTo(int)}. */ public class ReqOptSumScorer extends Scorer { /** The scorers passed from the constructor. * These are set to null as soon as their next() or skipTo() returns false. */ private Scorer reqScorer; private Scorer optScorer; /** Construct a <code>ReqOptScorer</code>. * @param reqScorer The required scorer. This must match. * @param optScorer The optional scorer. This is used for scoring only. */ public ReqOptSumScorer( Scorer reqScorer, Scorer optScorer) { super(null); // No similarity used. this.reqScorer = reqScorer; this.optScorer = optScorer; } private boolean firstTimeOptScorer = true; public boolean next() throws IOException { return reqScorer.next(); } public boolean skipTo(int target) throws IOException { return reqScorer.skipTo(target); } public int doc() { return reqScorer.doc(); } /** Returns the score of the current document matching the query. * Initially invalid, until [EMAIL PROTECTED] #next()} is called the first time. * @return The score of the required scorer, eventually increased by the score * of the optional scorer when it also matches the current document. */ public float score() throws IOException { int curDoc = reqScorer.doc(); float reqScore = reqScorer.score(); if (firstTimeOptScorer) { firstTimeOptScorer = false; if (! optScorer.skipTo(curDoc)) { optScorer = null; return reqScore; } } else if (optScorer == null) { return reqScore; } else if ((optScorer.doc() < curDoc) && (! optScorer.skipTo(curDoc))) { optScorer = null; return reqScore; } // assert (optScorer != null) && (optScorer.doc() >= curDoc); return (optScorer.doc() == curDoc) ? reqScore + optScorer.score() : reqScore; } /** Explain the score of a document. * @todo Also show the total score. * See BooleanScorer.explain() on how to do this. */ public Explanation explain(int doc) throws IOException { Explanation res = new Explanation(); res.setDescription("required, optional"); res.addDetail(reqScorer.explain(doc)); res.addDetail(optScorer.explain(doc)); return res; } } 1.1 jakarta-lucene/src/java/org/apache/lucene/search/BooleanScorer2.java Index: BooleanScorer2.java =================================================================== package org.apache.lucene.search; /** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Iterator; /** An alternative to BooleanScorer. * <br>Uses ConjunctionScorer, DisjunctionScorer, ReqOptScorer and ReqExclScorer. * <br>Implements skipTo(), and has no limitations on the numbers of added scorers. */ public class BooleanScorer2 extends Scorer { private ArrayList requiredScorers = new ArrayList(); private ArrayList optionalScorers = new ArrayList(); private ArrayList prohibitedScorers = new ArrayList(); private class Coordinator { int maxCoord = 0; // to be increased for each non prohibited scorer private float[] coordFactors = null; void init() { // use after all scorers have been added. coordFactors = new float[maxCoord + 1]; Similarity sim = getSimilarity(); for (int i = 0; i <= maxCoord; i++) { coordFactors[i] = sim.coord(i, maxCoord); } } int nrMatchers; // to be increased by score() of match counting scorers. void initDoc() { nrMatchers = 0; } float coordFactor() { return coordFactors[nrMatchers]; } } private final Coordinator coordinator; /** The scorer to which all scoring will be delegated, * except for computing and using the coordination factor. */ private Scorer countingSumScorer = null; public BooleanScorer2(Similarity similarity) { super(similarity); coordinator = new Coordinator(); } public void add(final Scorer scorer, boolean required, boolean prohibited) { if (!prohibited) { coordinator.maxCoord++; } if (required) { if (prohibited) { throw new IllegalArgumentException("scorer cannot be required and prohibited"); } requiredScorers.add(scorer); } else if (prohibited) { prohibitedScorers.add(scorer); } else { optionalScorers.add(scorer); } } /** Initialize the match counting scorer that sums all the * scores. <p> * When "counting" is used in a name it means counting the number * of matching scorers.<br> * When "sum" is used in a name it means score value summing * over the matching scorers */ private void initCountingSumScorer() { coordinator.init(); countingSumScorer = makeCountingSumScorer(); } /** Count a scorer as a single match. */ private class SingleMatchScorer extends Scorer { private Scorer scorer; SingleMatchScorer(Scorer scorer) { super(scorer.getSimilarity()); this.scorer = scorer; } public float score() throws IOException { coordinator.nrMatchers++; return scorer.score(); } public int doc() { return scorer.doc(); } public boolean next() throws IOException { return scorer.next(); } public boolean skipTo(int docNr) throws IOException { return scorer.skipTo(docNr); } public Explanation explain(int docNr) throws IOException { return scorer.explain(docNr); } } private Scorer countingDisjunctionSumScorer(List scorers) // each scorer from the list counted as a single matcher { return new DisjunctionSumScorer(scorers) { public float score() throws IOException { coordinator.nrMatchers += nrMatchers; return super.score(); } }; } private static Similarity defaultSimilarity = new DefaultSimilarity(); private Scorer countingConjunctionSumScorer(List requiredScorers) // each scorer from the list counted as a single matcher { final int requiredNrMatchers = requiredScorers.size(); ConjunctionScorer cs = new ConjunctionScorer(defaultSimilarity) { public float score() throws IOException { coordinator.nrMatchers += requiredNrMatchers; // All scorers match, so defaultSimilarity super.score() always has 1 as // the coordination factor. // Therefore the sum of the scores of the requiredScorers // is used as score. return super.score(); } }; Iterator rsi = requiredScorers.iterator(); while (rsi.hasNext()) { cs.add((Scorer) rsi.next()); } return cs; } /** Returns the scorer to be used for match counting and score summing. * Uses requiredScorers, optionalScorers and prohibitedScorers. */ private Scorer makeCountingSumScorer() // each scorer counted as a single matcher { if (requiredScorers.size() == 0) { if (optionalScorers.size() == 0) { return new NonMatchingScorer(); // only prohibited scorers } else if (optionalScorers.size() == 1) { return makeCountingSumScorer2( // the only optional scorer is required new SingleMatchScorer((Scorer) optionalScorers.get(0)), new ArrayList()); // no optional scorers left } else { // more than 1 optionalScorers, no required scorers return makeCountingSumScorer2( // at least one optional scorer is required countingDisjunctionSumScorer(optionalScorers), new ArrayList()); // no optional scorers left } } else if (requiredScorers.size() == 1) { // 1 required return makeCountingSumScorer2( new SingleMatchScorer((Scorer) requiredScorers.get(0)), optionalScorers); } else { // more required scorers return makeCountingSumScorer2( countingConjunctionSumScorer(requiredScorers), optionalScorers); } } /** Returns the scorer to be used for match counting and score summing. * Uses the arguments and prohibitedScorers. * @param requiredCountingSumScorer A required scorer already built. * @param optionalScorers A list of optional scorers, possibly empty. */ private Scorer makeCountingSumScorer2( Scorer requiredCountingSumScorer, List optionalScorers) // not match counting { if (optionalScorers.size() == 0) { // no optional if (prohibitedScorers.size() == 0) { // no prohibited return requiredCountingSumScorer; } else if (prohibitedScorers.size() == 1) { // no optional, 1 prohibited return new ReqExclScorer( requiredCountingSumScorer, (Scorer) prohibitedScorers.get(0)); // not match counting } else { // no optional, more prohibited return new ReqExclScorer( requiredCountingSumScorer, new DisjunctionSumScorer(prohibitedScorers)); // score unused. not match counting } } else if (optionalScorers.size() == 1) { // 1 optional return makeCountingSumScorer3( requiredCountingSumScorer, new SingleMatchScorer((Scorer) optionalScorers.get(0))); } else { // more optional return makeCountingSumScorer3( requiredCountingSumScorer, countingDisjunctionSumScorer(optionalScorers)); } } /** Returns the scorer to be used for match counting and score summing. * Uses the arguments and prohibitedScorers. * @param requiredCountingSumScorer A required scorer already built. * @param optionalCountingSumScorer An optional scorer already built. */ private Scorer makeCountingSumScorer3( Scorer requiredCountingSumScorer, Scorer optionalCountingSumScorer) { if (prohibitedScorers.size() == 0) { // no prohibited return new ReqOptSumScorer(requiredCountingSumScorer, optionalCountingSumScorer); } else if (prohibitedScorers.size() == 1) { // 1 prohibited return new ReqOptSumScorer( new ReqExclScorer(requiredCountingSumScorer, (Scorer) prohibitedScorers.get(0)), // not match counting optionalCountingSumScorer); } else { // more prohibited return new ReqOptSumScorer( new ReqExclScorer( requiredCountingSumScorer, new DisjunctionSumScorer(prohibitedScorers)), // score unused. not match counting optionalCountingSumScorer); } } /** Scores and collects all matching documents. * @param hc The collector to which all matching documents are passed through * [EMAIL PROTECTED] HitCollector#collect(int, float)}. * <br>When this method is used the [EMAIL PROTECTED] #explain(int)} method should not be used. */ public void score(HitCollector hc) throws IOException { if (countingSumScorer == null) { initCountingSumScorer(); } while (countingSumScorer.next()) { hc.collect(countingSumScorer.doc(), score()); } } /** Expert: Collects matching documents in a range. * <br>Note that [EMAIL PROTECTED] #next()} must be called once before this method is * called for the first time. * @param hc The collector to which all matching documents are passed through * [EMAIL PROTECTED] HitCollector#collect(int, float)}. * @param max Do not score documents past this. * @return true if more matching documents may remain. */ protected boolean score(HitCollector hc, int max) throws IOException { // null pointer exception when next() was not called before: int docNr = countingSumScorer.doc(); while (docNr < max) { hc.collect(docNr, score()); if (! countingSumScorer.next()) { return false; } docNr = countingSumScorer.doc(); } return true; } public int doc() { return countingSumScorer.doc(); } public boolean next() throws IOException { if (countingSumScorer == null) { initCountingSumScorer(); } return countingSumScorer.next(); } public float score() throws IOException { coordinator.initDoc(); float sum = countingSumScorer.score(); return sum * coordinator.coordFactor(); } public boolean skipTo(int target) throws IOException { if (countingSumScorer == null) { initCountingSumScorer(); } return countingSumScorer.skipTo(target); } public Explanation explain(int doc) throws IOException { throw new UnsupportedOperationException(); /* How to explain the coordination factor? initCountingSumScorer(); return countingSumScorer.explain(doc); // misses coord factor. */ } } 1.1 jakarta-lucene/src/java/org/apache/lucene/search/DisjunctionSumScorer.java Index: DisjunctionSumScorer.java =================================================================== package org.apache.lucene.search; /** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.List; import java.util.Iterator; import java.io.IOException; import org.apache.lucene.util.PriorityQueue; /** A Scorer for OR like queries, counterpart of Lucene's <code>ConjunctionScorer</code>. * This Scorer implements [EMAIL PROTECTED] Scorer#skipTo(int)} and uses skipTo() on the given Scorers. */ public class DisjunctionSumScorer extends Scorer { /** The number of subscorers. */ private final int nrScorers; /** The subscorers. */ protected final List subScorers; /** The minimum number of scorers that should match. */ private final int minimumNrMatchers; /** The scorerQueue contains all subscorers ordered by their current doc(), * with the minimum at the top. * <br>The scorerQueue is initialized the first time next() or skipTo() is called. * <br>An exhausted scorer is immediately removed from the scorerQueue. * <br>If less than the minimumNrMatchers scorers * remain in the scorerQueue next() and skipTo() return false. * <p> * After each to call to next() or skipTo() * <code>currentSumScore</code> is the total score of the current matching doc, * <code>nrMatchers</code> is the number of matching scorers, * and all scorers are after the matching doc, or are exhausted. */ private ScorerQueue scorerQueue = null; /** The document number of the current match. */ private int currentDoc = -1; /** The number of subscorers that provide the current match. */ protected int nrMatchers = -1; private float currentScore = Float.NaN; /** Construct a <code>DisjunctionScorer</code>. * @param subScorers A collection of at least two subscorers. * @param minimumNrMatchers The positive minimum number of subscorers that should * match to match this query. * <br>When <code>minimumNrMatchers</code> is bigger than * the number of <code>subScorers</code>, * no matches will be produced. * <br>When minimumNrMatchers equals the number of subScorers, * it more efficient to use <code>ConjunctionScorer</code>. */ public DisjunctionSumScorer( List subScorers, int minimumNrMatchers) { super(null); nrScorers = subScorers.size(); if (minimumNrMatchers <= 0) { throw new IllegalArgumentException("Minimum nr of matchers must be positive"); } if (nrScorers <= 1) { throw new IllegalArgumentException("There must be at least 2 subScorers"); } this.minimumNrMatchers = minimumNrMatchers; this.subScorers = subScorers; } /** Construct a <code>DisjunctionScorer</code>, using one as the minimum number * of matching subscorers. */ public DisjunctionSumScorer(List subScorers) { this(subScorers, 1); } /** Called the first time next() or skipTo() is called to * initialize <code>scorerQueue</code>. */ private void initScorerQueue() throws IOException { Iterator si = subScorers.iterator(); scorerQueue = new ScorerQueue(nrScorers); while (si.hasNext()) { Scorer se = (Scorer) si.next(); if (se.next()) { // doc() method will be used in scorerQueue. scorerQueue.insert(se); } } } /** A <code>PriorityQueue</code> that orders by [EMAIL PROTECTED] Scorer#doc()}. */ private class ScorerQueue extends PriorityQueue { ScorerQueue(int size) { initialize(size); } protected boolean lessThan(Object o1, Object o2) { return ((Scorer)o1).doc() < ((Scorer)o2).doc(); } } public boolean next() throws IOException { if (scorerQueue == null) { initScorerQueue(); } if (scorerQueue.size() < minimumNrMatchers) { return false; } else { return advanceAfterCurrent(); } } /** Advance all subscorers after the current document determined by the * top of the <code>scorerQueue</code>. * Repeat until at least the minimum number of subscorers match on the same * document and all subscorers are after that document or are exhausted. * <br>On entry the <code>scorerQueue</code> has at least <code>minimumNrMatchers</code> * available. At least the scorer with the minimum document number will be advanced. * @return true iff there is a match. * <br>In case there is a match, </code>currentDoc</code>, </code>currentSumScore</code>, * and </code>nrMatchers</code> describe the match. * * @todo Investigate whether it is possible to use skipTo() when * the minimum number of matchers is bigger than one, ie. try and use the * character of ConjunctionScorer for the minimum number of matchers. */ protected boolean advanceAfterCurrent() throws IOException { do { // repeat until minimum nr of matchers Scorer top = (Scorer) scorerQueue.top(); currentDoc = top.doc(); currentScore = top.score(); nrMatchers = 1; do { // Until all subscorers are after currentDoc if (top.next()) { scorerQueue.adjustTop(); } else { scorerQueue.pop(); if (scorerQueue.size() < (minimumNrMatchers - nrMatchers)) { // Not enough subscorers left for a match on this document, // and also no more chance of any further match. return false; } if (scorerQueue.size() == 0) { break; // nothing more to advance, check for last match. } } top = (Scorer) scorerQueue.top(); if (top.doc() != currentDoc) { break; // All remaining subscorers are after currentDoc. } else { currentScore += top.score(); nrMatchers++; } } while (true); if (nrMatchers >= minimumNrMatchers) { return true; } else if (scorerQueue.size() < minimumNrMatchers) { return false; } } while (true); } /** Returns the score of the current document matching the query. * Initially invalid, until [EMAIL PROTECTED] #next()} is called the first time. */ public float score() throws IOException { return currentScore; } public int doc() { return currentDoc; } /** Returns the number of subscorers matching the current document. * Initially invalid, until [EMAIL PROTECTED] #next()} is called the first time. */ public int nrMatchers() { return nrMatchers; } /** Skips to the first match beyond the current whose document number is * greater than or equal to a given target. * <br>When this method is used the [EMAIL PROTECTED] #explain(int)} method should not be used. * <br>The implementation uses the skipTo() method on the subscorers. * @param target The target document number. * @return true iff there is such a match. */ public boolean skipTo(int target) throws IOException { if (scorerQueue == null) { initScorerQueue(); } if (scorerQueue.size() < minimumNrMatchers) { return false; } if (target <= currentDoc) { target = currentDoc + 1; } do { Scorer top = (Scorer) scorerQueue.top(); if (top.doc() >= target) { return advanceAfterCurrent(); } else if (top.skipTo(target)) { scorerQueue.adjustTop(); } else { scorerQueue.pop(); if (scorerQueue.size() < minimumNrMatchers) { return false; } } } while (true); } /** Gives and explanation for the score of a given document. * @todo Show the resulting score. See BooleanScorer.explain() on how to do this. */ public Explanation explain(int doc) throws IOException { Explanation res = new Explanation(); res.setDescription("At least " + minimumNrMatchers + " of"); Iterator ssi = subScorers.iterator(); while (ssi.hasNext()) { res.addDetail( ((Scorer) ssi.next()).explain(doc)); } return res; } } 1.1 jakarta-lucene/src/java/org/apache/lucene/search/NonMatchingScorer.java Index: NonMatchingScorer.java =================================================================== package org.apache.lucene.search; /** * Copyright 2005 Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; /** A scorer that matches no document at all. */ class NonMatchingScorer extends Scorer { public NonMatchingScorer() { super(null); } // no similarity used public int doc() { throw new UnsupportedOperationException(); } public boolean next() throws IOException { return false; } public float score() { throw new UnsupportedOperationException(); } public boolean skipTo(int target) { return false; } public Explanation explain(int doc) { Explanation e = new Explanation(); e.setDescription("No document matches."); return e; } } 1.1 jakarta-lucene/src/java/org/apache/lucene/search/ReqExclScorer.java Index: ReqExclScorer.java =================================================================== package org.apache.lucene.search; /** * Copyright 2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; /** A Scorer for queries with a required subscorer and an excluding (prohibited) subscorer. * <br> * This <code>Scorer</code> implements [EMAIL PROTECTED] Scorer#skipTo(int)}, * and it uses the skipTo() on the given scorers. */ public class ReqExclScorer extends Scorer { private Scorer reqScorer, exclScorer; /** Construct a <code>ReqExclScorer</code>. * @param reqScorer The scorer that must match, except where * @param exclScorer indicates exclusion. */ public ReqExclScorer( Scorer reqScorer, Scorer exclScorer) { super(null); // No similarity used. this.reqScorer = reqScorer; this.exclScorer = exclScorer; } private boolean firstTime = true; public boolean next() throws IOException { if (firstTime) { if (! exclScorer.next()) { exclScorer = null; // exhausted at start } firstTime = false; } if (reqScorer == null) { return false; } if (! reqScorer.next()) { reqScorer = null; // exhausted, nothing left return false; } if (exclScorer == null) { return true; // reqScorer.next() already returned true } return toNonExcluded(); } /** Advance to non excluded doc. * <br>On entry: * <ul> * <li>reqScorer != null, * <li>exclScorer != null, * <li>reqScorer was advanced once via next() or skipTo() * and reqScorer.doc() may still be excluded. * </ul> * Advances reqScorer a non excluded required doc, if any. * @return true iff there is a non excluded required doc. */ private boolean toNonExcluded() throws IOException { int exclDoc = exclScorer.doc(); do { int reqDoc = reqScorer.doc(); // may be excluded if (reqDoc < exclDoc) { return true; // reqScorer advanced to before exclScorer, ie. not excluded } else if (reqDoc > exclDoc) { if (! exclScorer.skipTo(reqDoc)) { exclScorer = null; // exhausted, no more exclusions return true; } exclDoc = exclScorer.doc(); if (exclDoc > reqDoc) { return true; // not excluded } } } while (reqScorer.next()); reqScorer = null; // exhausted, nothing left return false; } public int doc() { return reqScorer.doc(); // reqScorer may be null when next() or skipTo() already return false } /** Returns the score of the current document matching the query. * Initially invalid, until [EMAIL PROTECTED] #next()} is called the first time. * @return The score of the required scorer. */ public float score() throws IOException { return reqScorer.score(); // reqScorer may be null when next() or skipTo() already return false } /** Skips to the first match beyond the current whose document number is * greater than or equal to a given target. * <br>When this method is used the [EMAIL PROTECTED] #explain(int)} method should not be used. * @param target The target document number. * @return true iff there is such a match. */ public boolean skipTo(int target) throws IOException { if (firstTime) { firstTime = false; if (! exclScorer.skipTo(target)) { exclScorer = null; // exhausted } } if (reqScorer == null) { return false; } if (exclScorer == null) { return reqScorer.skipTo(target); } if (! reqScorer.skipTo(target)) { reqScorer = null; return false; } return toNonExcluded(); } public Explanation explain(int doc) throws IOException { Explanation res = new Explanation(); if (exclScorer.skipTo(doc) && (exclScorer.doc() == doc)) { res.setDescription("excluded"); } else { res.setDescription("not excluded"); res.addDetail(reqScorer.explain(doc)); } return res; } } 1.3 +38 -9 jakarta-lucene/src/test/org/apache/lucene/search/CheckHits.java Index: CheckHits.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/test/org/apache/lucene/search/CheckHits.java,v retrieving revision 1.2 retrieving revision 1.3 diff -u -r1.2 -r1.3 --- CheckHits.java 10 Oct 2004 15:44:09 -0000 1.2 +++ CheckHits.java 24 Jan 2005 19:21:01 -0000 1.3 @@ -1,7 +1,7 @@ package org.apache.lucene.search; /** - * Copyright 2004 The Apache Software Foundation + * Copyright 2004-2005 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,6 @@ * limitations under the License. */ -/* 20 May 2004: Factored out of spans tests. Please leave this comment - until this class is evt. also used by tests in search package. - */ - -import org.apache.lucene.search.Searcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Hits; import junit.framework.TestCase; import java.io.IOException; @@ -30,6 +23,8 @@ import java.util.TreeSet; public class CheckHits { + /** Tests that a query has expected document number results. + */ public static void checkHits( Query query, String defaultFieldName, @@ -49,7 +44,41 @@ actual.add(new Integer(hits.id(i))); } - TestCase.assertEquals(query.toString(defaultFieldName), correct, actual); + testCase.assertEquals(query.toString(defaultFieldName), correct, actual); + } + + /** Tests that a Hits has an expected order of documents */ + public static void checkDocIds(String mes, int[] results, Hits hits, TestCase testCase) + throws IOException { + testCase.assertEquals(mes + " nr of hits", results.length, hits.length()); + for (int i = 0; i < results.length; i++) { + testCase.assertEquals(mes + " doc nrs for hit " + i, results[i], hits.id(i)); + } + } + + /** Tests that two queries have an expected order of documents, + * and that the two queries have the same score values. + */ + public static void checkHitsQuery( + Query query, + Hits hits1, + Hits hits2, + int[] results, + TestCase testCase) + throws IOException { + + checkDocIds("hits1", results, hits1, testCase); + checkDocIds("hits2", results, hits2, testCase); + + final float scoreTolerance = 1.0e-7f; + for (int i = 0; i < results.length; i++) { + if (Math.abs(hits1.score(i) - hits2.score(i)) > scoreTolerance) { + testCase.fail("Hit " + i + ", doc nrs " + hits1.id(i) + " and " + hits2.id(i) + + "\nunequal scores: " + hits1.score(i) + + "\n and: " + hits2.score(i) + + "\nfor query:" + query.toString()); + } + } } public static void printDocNrs(Hits hits) throws IOException { 1.1 jakarta-lucene/src/test/org/apache/lucene/search/TestBoolean2.java Index: TestBoolean2.java =================================================================== package org.apache.lucene.search; /** * Copyright 2005 Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.analysis.WhitespaceAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.ParseException; import junit.framework.TestCase; /** Test BooleanQuery2 against BooleanQuery by overriding the standard query parser. * This also tests the scoring order of BooleanQuery. */ public class TestBoolean2 extends TestCase { private IndexSearcher searcher; public static final String field = "field"; public void setUp() throws Exception { RAMDirectory directory = new RAMDirectory(); IndexWriter writer= new IndexWriter(directory, new WhitespaceAnalyzer(), true); for (int i = 0; i < docFields.length; i++) { Document doc = new Document(); doc.add(new Field(field, docFields[i], Field.Store.NO, Field.Index.TOKENIZED)); writer.addDocument(doc); } writer.close(); searcher = new IndexSearcher(directory); } private String[] docFields = { "w1 w2 w3 w4 w5", "w1 w3 w2 w3", "w1 xx w2 yy w3", "w1 w3 xx w2 yy w3" }; public Query makeQuery(String queryText) throws ParseException { return (new QueryParser(field, new WhitespaceAnalyzer())).parse(queryText); } public void queriesTest(String queryText, int[] expDocNrs) throws Exception { //System.out.println(); //System.out.println("Query: " + queryText); Query query1 = makeQuery(queryText); BooleanQuery.setUseScorer14(true); Hits hits1 = searcher.search(query1); Query query2 = makeQuery(queryText); // there should be no need to parse again... BooleanQuery.setUseScorer14(false); Hits hits2 = searcher.search(query2); CheckHits.checkHitsQuery(query2, hits1, hits2, expDocNrs, this); } public void testQueries01() throws Exception { String queryText = "+w3 +xx"; int[] expDocNrs = {2,3}; queriesTest(queryText, expDocNrs); } public void testQueries02() throws Exception { String queryText = "+w3 xx"; int[] expDocNrs = {2,3,1,0}; queriesTest(queryText, expDocNrs); } public void testQueries03() throws Exception { String queryText = "w3 xx"; int[] expDocNrs = {2,3,1,0}; queriesTest(queryText, expDocNrs); } public void testQueries04() throws Exception { String queryText = "w3 -xx"; int[] expDocNrs = {1,0}; queriesTest(queryText, expDocNrs); } public void testQueries05() throws Exception { String queryText = "+w3 -xx"; int[] expDocNrs = {1,0}; queriesTest(queryText, expDocNrs); } public void testQueries06() throws Exception { String queryText = "+w3 -xx -w5"; int[] expDocNrs = {1}; queriesTest(queryText, expDocNrs); } public void testQueries07() throws Exception { String queryText = "-w3 -xx -w5"; int[] expDocNrs = {}; queriesTest(queryText, expDocNrs); } public void testQueries08() throws Exception { String queryText = "+w3 xx -w5"; int[] expDocNrs = {2,3,1}; queriesTest(queryText, expDocNrs); } public void testQueries09() throws Exception { String queryText = "+w3 +xx +w2 zz"; int[] expDocNrs = {2, 3}; queriesTest(queryText, expDocNrs); } public void testQueries10() throws Exception { String queryText = "+w3 +xx +w2 zz"; int[] expDocNrs = {2, 3}; searcher.setSimilarity(new DefaultSimilarity(){ public float coord(int overlap, int maxOverlap) { return overlap / ((float)maxOverlap - 1); } }); queriesTest(queryText, expDocNrs); } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]