On Sun, Mar 24, 2013 at 10:46 AM, Wei Wang <[email protected]> wrote:
Hi,
> For example, assume we have fields F1 and F2, we would like to find
> all documents with condition F1+F2 > 5.0. This filter may be combined
> with other filters to form a BooleanFilter.
>
> The question is, is there any way to construct an efficient filter to do this?
I don't know - but the API looked interesting, so I gave it a try (see
below). I had never worked with search filters before writing that
code, so please proceed with caution, as I am not sure of many things
(iteration of all documents, treatment of deleted documents, what is
that "acceptDocs" variable, what threading constraints to respect...).
---
// add your package declaration
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.IntField;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.FieldCache;
import org.apache.lucene.search.FieldCache.Ints;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;
public class FilterTest {
private static final Version VERSION = Version.LUCENE_42;
private static final String FIELD_ID = "id";
private static final String FIELD_ALPHA = "alpha";
private static final String FIELD_OMEGA = "omega";
private static final int SUM_THRESHOLD = 5;
private static final int[] VALUES_ALPHA = new int[] { 1, 2, 3, 4, 5 };
private static final int[] VALUES_OMEGA = new int[] { 5, 0, 5, 0, 5 };
private static final Set<Integer> EXPECTED_MATCHED_DOCUMENT_IDS = new
HashSet<Integer>(Arrays.asList(0, 2, 4));
private Directory directory;
@Before
public void setUp() throws IOException {
directory = new RAMDirectory();
Analyzer analyzer = new StandardAnalyzer(VERSION);
IndexWriterConfig config = new IndexWriterConfig(VERSION,
analyzer);
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
IndexWriter writer = new IndexWriter(directory, config);
for (int ii = 0; ii < VALUES_ALPHA.length; ii++) {
Document doc = new Document();
Field id = new IntField(FIELD_ID, ii,
IntField.Store.YES);
Field alpha = new IntField(FIELD_ALPHA,
VALUES_ALPHA[ii],
IntField.Store.YES);
Field omega = new IntField(FIELD_OMEGA,
VALUES_OMEGA[ii],
IntField.Store.YES);
doc.add(id);
doc.add(alpha);
doc.add(omega);
writer.addDocument(doc);
}
writer.close();
}
@Test
public void testSumFilter() throws IOException {
IndexReader reader = DirectoryReader.open(directory);
IndexSearcher searcher = new IndexSearcher(reader);
TopDocs results = searcher.search(new MatchAllDocsQuery(), new
SumFilter(SUM_THRESHOLD), VALUES_ALPHA.length);
try {
assertEquals(EXPECTED_MATCHED_DOCUMENT_IDS.size(),
results.totalHits);
for (int ii = 0; ii < results.scoreDocs.length; ii++) {
int docId = results.scoreDocs[ii].doc;
Document doc = reader.document(docId);
int idValue =
doc.getField(FIELD_ID).numericValue().intValue();
int alphaValue =
doc.getField(FIELD_ALPHA).numericValue().intValue();
int omegaValue =
doc.getField(FIELD_OMEGA).numericValue().intValue();
assertTrue(EXPECTED_MATCHED_DOCUMENT_IDS.contains(idValue));
assertTrue(alphaValue + omegaValue >
SUM_THRESHOLD);
}
} finally {
reader.close();
}
}
private class SumFilter extends Filter {
private int minValue;
public SumFilter(int minValue) {
this.minValue = minValue;
}
@Override
public DocIdSet getDocIdSet(AtomicReaderContext context, Bits
acceptDocs) throws IOException {
AtomicReader reader = context.reader();
Ints alphaCache = FieldCache.DEFAULT.getInts(reader,
FIELD_ALPHA, false);
Ints omegaCache = FieldCache.DEFAULT.getInts(reader,
FIELD_OMEGA, false);
SimpleDocIdSet docIdSet = new SimpleDocIdSet();
int maxDoc = reader.maxDoc();
for (int docId = 0; docId < maxDoc; docId++) {
int sum = alphaCache.get(docId) +
omegaCache.get(docId);
if (sum > minValue) {
docIdSet.add(docId);
}
}
return docIdSet;
}
}
private class SimpleDocIdSet extends DocIdSet {
private final TreeSet<Integer> sortedDocIdSet = new
TreeSet<Integer>();
public void add(int docId) {
sortedDocIdSet.add(docId);
}
@Override
public DocIdSetIterator iterator() throws IOException {
return new DocIdSetIterator() {
private Iterator<Integer>
sortedDocIdSetIterator =
sortedDocIdSet.iterator();
private int currentDocId = -1;
@Override
public int advance(int target) throws
IOException {
while ((currentDocId = nextDoc()) <
target) {
}
return currentDocId;
}
@Override
public int docID() {
if (currentDocId == -1) {
return -1;
}
if (!sortedDocIdSetIterator.hasNext()) {
return NO_MORE_DOCS;
}
return currentDocId;
}
@Override
public int nextDoc() throws IOException {
if (!sortedDocIdSetIterator.hasNext()) {
return NO_MORE_DOCS;
}
currentDocId =
sortedDocIdSetIterator.next();
return currentDocId;
}
};
}
}
}
---
Regards,
Yep.
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]