Totally untested, but here is a hack at what the scorer might look
like when the number of terms is large.

-Yonik


package org.apache.lucene.search;

import org.apache.lucene.index.TermEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.TermDocs;

import java.io.IOException;

/**
 * @author yonik
 * @version $Id$
 */
public class MultiTermScorer extends Scorer{
  protected final float[] scores;
  protected int pos;
  protected float docScore;

  public MultiTermScorer(Similarity similarity, IndexReader reader,
Weight w, TermEnum terms, byte[] norms, boolean include_idf, boolean
include_tf) throws IOException {
    super(similarity);
    float weightVal = w.getValue();
    int maxDoc = reader.maxDoc();
    this.scores = new float[maxDoc];
    float[] normDecoder = Similarity.getNormDecoder();

    TermDocs tdocs = reader.termDocs();
    while (terms.next()) {
      tdocs.seek(terms);
      float termScore = weightVal;
      if (include_idf) {
        termScore *= similarity.idf(terms.docFreq(),maxDoc);
      }
      while (tdocs.next()) {
        int doc = tdocs.doc();
        float subscore = termScore;
        if (include_tf) subscore *= tdocs.freq();
        if (norms!=null) subscore *= normDecoder[norms[doc&0xff]];
        scores[doc] += subscore;
      }
    }

    pos=-1;
  }

  // could also use a bitset to keep track of docs in the set...
  public boolean next() throws IOException {
    while (++pos < scores.length) {
      if (scores[pos] != 0) return true;
    }
    return false;
  }

  public int doc() {
    return pos;
  }

  public float score() throws IOException {
    return scores[pos];
  }

  public boolean skipTo(int target) throws IOException {
    pos=target-1;
    return next();
  }

  public Explanation explain(int doc) throws IOException {
    return null;
  }
}



-Yonik
Now hiring -- http://forms.cnet.com/slink?231706

---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to