Hi,
I'd like to add the attached class to Lucene's core. It makes live easier
for people who need to add and delete documents from an index by hiding
all the IndexReader/IndexWriter stuff. Anybody needing full performance or
best performance with threads can still use IndexReader/IndexWriter
directly.
What do you think? If this gets accepted, it also needs a better name.
Regards
Daniel
--
http://www.danielnaber.de
package org.apache.lucene.index;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
/**
* A class to delete and add documents to an index without the
* need to care about the implementation detail that adding is done
* via IndexWriter and deletion is done via IndexReader.
*
* <p>Note that you cannot create more than one <code>Index</code> object
* on the same directory at the same time.
*
* <p>Although an instance of this class can be used from more than one
* thread, you will not get good performance. Use IndexReader and IndexWriter
* directly for that (you will need to care about synchronizsation yourself
* then).
*
* <p>While you can freely mix calls to add() and delete() using this class,
* you should batch you calls for best performance. For example, if you
* want to update 20 document, you should first delete all those documents,
* then add all the new documents.
*
* @author Daniel Naber
*/
public class Index {
IndexWriter indexWriter = null;
IndexReader indexReader = null;
Directory directory;
Analyzer analyzer;
boolean open = false;
/**
* Open an index with write access.
*
* @param directory the index directory
* @param analyzer the analyzer to use for adding new documents
* @param create <code>true</code> to create the index or overwrite the existing one;
* <code>false</code> to append to the existing index
*/
public Index(Directory directory, Analyzer analyzer, boolean create) throws IOException {
init(directory, analyzer, create);
}
/**
* Open an index with write access.
*
* @param dirName the index directory
* @param analyzer the analyzer to use for adding new documents
* @param create <code>true</code> to create the index or overwrite the existing one;
* <code>false</code> to append to the existing index
*/
public Index(String dirName, Analyzer analyzer, boolean create) throws IOException {
Directory dir = FSDirectory.getDirectory(dirName, create);
init(dir, analyzer, create);
}
/**
* Open an index with write access.
*
* @param file the index directory
* @param analyzer the analyzer to use for adding new documents
* @param create <code>true</code> to create the index or overwrite the existing one;
* <code>false</code> to append to the existing index
*/
public Index(File file, Analyzer analyzer, boolean create) throws IOException {
Directory dir = FSDirectory.getDirectory(file, create);
init(dir, analyzer, create);
}
private void init(Directory directory, Analyzer analyzer, boolean create) throws IOException {
this.directory = directory;
synchronized(this.directory) {
this.analyzer = analyzer;
indexWriter = new IndexWriter(directory, analyzer, create);
open = true;
}
}
/**
* @see IndexWriter#addDocument(Document, Analyzer)
* @throws IllegalStateException if the index is closed
*/
public void addDocument(Document doc, Analyzer docAnalyzer) throws IOException {
synchronized(directory) {
if (!open)
throw new IllegalStateException("Index is closed");
createIndexWriter();
if (docAnalyzer != null)
indexWriter.addDocument(doc, docAnalyzer);
else
indexWriter.addDocument(doc);
}
}
/**
* @see IndexWriter#addDocument(Document)
* @throws IllegalStateException if the index is closed
*/
public void addDocument(Document doc) throws IOException {
addDocument(doc, null);
}
private void createIndexWriter() throws IOException {
if (indexWriter == null) {
if (indexReader != null) {
indexReader.close();
indexReader = null;
}
indexWriter = new IndexWriter(directory, analyzer, false);
}
}
private void createIndexReader() throws IOException {
if (indexReader == null) {
if (indexWriter != null) {
indexWriter.close();
indexWriter = null;
}
indexReader = IndexReader.open(directory);
}
}
/**
* @see IndexReader#delete(Term)
* @throws IllegalStateException if the index is closed
*/
public void delete(Term term) throws IOException {
synchronized(directory) {
if (!open)
throw new IllegalStateException("Index is closed");
createIndexReader();
indexReader.delete(term);
}
}
/**
* @see IndexReader#delete(int)
* @throws IllegalStateException if the index is closed
*/
public void delete(int docNum) throws IOException {
synchronized(directory) {
if (!open)
throw new IllegalStateException("Index is closed");
createIndexReader();
indexReader.delete(docNum);
}
}
/**
* @see IndexWriter#docCount()
* @throws IllegalStateException if the index is closed
*/
public int docCount() {
synchronized(directory) {
if (!open)
throw new IllegalStateException("Index is closed");
if (indexWriter != null) {
return indexWriter.docCount();
} else {
return indexReader.numDocs();
}
}
}
/**
* @see IndexWriter#optimize()
* @throws IllegalStateException if the index is closed
*/
public void optimize() throws IOException {
synchronized(directory) {
if (!open)
throw new IllegalStateException("Index is closed");
createIndexWriter();
indexWriter.optimize();
}
}
/**
* Close this index, writing all pending changes to disk.
*
* @throws IllegalStateException if the index has been closed before already
*/
public void close() throws IOException {
synchronized(directory) {
if (!open)
throw new IllegalStateException("Index is closed already");
if (indexWriter != null) {
indexWriter.close();
indexWriter = null;
} else {
indexReader.close();
indexReader = null;
}
open = false;
}
}
//TODO: implement from reader: isDeleted, hasDeletions
//TODO: implement from writer: setXYZ...
}
package org.apache.lucene.index;
import java.io.IOException;
import java.util.Random;
import java.util.Stack;
import junit.framework.TestCase;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestIndex extends TestCase {
public void testIndex() throws IOException {
testIndexInternal(0);
testIndexInternal(10);
testIndexInternal(50);
}
private void testIndexInternal(int maxWait) throws IOException {
boolean create = true;
// FIXME: use system temp dir:
Directory rd = FSDirectory.getDirectory("/tmp/testindex", create);
Index index = new Index(rd, new StandardAnalyzer(), create);
System.out.println("START");
IndexThread thread1 = new IndexThread(index, maxWait);
thread1.start();
IndexThread thread2 = new IndexThread(index, maxWait);
thread2.start();
while(thread1.isAlive() || thread2.isAlive()) {
try {
Thread.sleep(100);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
System.out.println("\nfinal optimize....");
index.optimize();
System.out.println("index size=" + index.docCount());
int added = thread1.added + thread2.added;
System.out.println("added=" + added);
int deleted = thread1.deleted + thread2.deleted;
System.out.println("deleted=" + deleted);
System.out.println("expected index size=" + (added-deleted));
assertEquals(added-deleted, index.docCount());
//System.out.println("max id=" + IndexThread.id);
index.close();
try {
index.close();
fail();
} catch(IllegalStateException e) {
// expected exception
}
}
private int id = 0;
private Stack idStack = new Stack();
// TODO: test case is not reproducible despite pseudo-random numbers
// used for anything:
private Random random = new Random(101); // constant seed for reproducability
private class IndexThread extends Thread {
private final int ITERATIONS = 250;
private int maxWait = 10;
private Index index;
private int added = 0;
private int deleted = 0;
IndexThread(Index index, int maxWait) {
this.index = index;
this.maxWait = maxWait;
id = 0;
idStack.clear();
}
public void run() {
try {
for(int i = 0; i < ITERATIONS; i++) {
if (random.nextInt(101) < 5) {
System.out.println("--- optimize... ---");
index.optimize();
} else if (random.nextInt(101) < 70) {
Document doc = getDocument();
//System.out.println(this + " add doc id=" + doc.get("id"));
System.out.println("add doc id=" + doc.get("id"));
index.addDocument(doc);
idStack.push(doc.get("id"));
added++;
} else {
if (idStack.size() == 0) {
// not enough docs in index, let's wait for next chance
} else {
// we just delete the last document added and remove it
// from the id stack so that it won't be removed twice:
String delId = (String)idStack.pop();
//System.out.println(this + " delete doc id = " + delId);
System.out.println("delete doc id = " + delId);
index.delete(new Term("id", new Integer(delId).toString()));
deleted++;
}
}
if (maxWait > 0) {
try {
int rand = random.nextInt(maxWait);
System.out.println("waiting " + rand + "ms");
Thread.sleep(rand);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
//System.out.println(">>>>"+x);
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private Document getDocument() {
Document doc = new Document();
doc.add(new Field("id", new Integer(id++).toString(), Field.Store.YES,
Field.Index.UN_TOKENIZED));
// add random stuff:
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
Field.Index.TOKENIZED));
doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
Field.Index.TOKENIZED));
return doc;
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]