Hello,
I'm trying to understand why i'm getting duplicated results in the attached
java code.
Debugging the code it seems that they come from differents segments.
Increasing the RAMBufferSizeMB to a level that all the docs are in the same
segment seems to return only unique numbers.
There is any way to get unique documents in a bulk query without having to
cache then in a memory structure?
Thanks in advance!!
package com.king.lucifer.lucene;
import java.io.IOException;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.MMapDirectory;
public class NumbersExample {
public static void main(String[] args) throws IOException {
new NumbersExample().run();
}
private void run() throws IOException {
FSDirectory dir = MMapDirectory.open(Paths.get("luceneTest"));
IndexWriterConfig config = new IndexWriterConfig(new StandardAnalyzer());
config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
IndexWriter indexWriter = new IndexWriter(dir, config);
SearcherManager searcherManager = new SearcherManager(indexWriter, null);
long baseId = 9906843115L;
int numberOfUniqueIds = 1_000_000;
String ID_FIELD_NAME = "id";
for (long id = 0; id < numberOfUniqueIds; id++) {
String finalId = String.valueOf(baseId + id);
Term term = new Term(ID_FIELD_NAME, finalId);
Document document = new Document();
document.add(new StringField(ID_FIELD_NAME, finalId, Field.Store.YES));
indexWriter.updateDocument(term, document);
}
searcherManager.maybeRefreshBlocking();
IndexSearcher indexSearcher = searcherManager.acquire();
Query luceneQuery = new MatchAllDocsQuery();
Set<String> ids = new HashSet<>();
indexSearcher.search(luceneQuery, new SimpleCollector() {
@Override
public void collect(final int doc) throws IOException {
ids.add(indexSearcher.doc(doc).getField(ID_FIELD_NAME).stringValue());
}
@Override
public ScoreMode scoreMode() {
return ScoreMode.TOP_SCORES;
}
});
System.out.println(ids.size());
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]