Attached.
It uses subclasses and instanceof which is sort of "hackish" - to do it
correctly requires changes to the base classes.
-----Original Message-----
From: jason rutherglen [mailto:[EMAIL PROTECTED]
Sent: Monday, May 01, 2006 1:43 PM
To: [email protected]
Subject: Re: GData, updateable IndexSearcher
Can you post your code?
----- Original Message ----
From: Robert Engels <[EMAIL PROTECTED]>
To: [email protected]; jason rutherglen <[EMAIL PROTECTED]>
Sent: Monday, May 1, 2006 11:33:06 AM
Subject: RE: GData, updateable IndexSearcher
fyi, using my reopen(0 implementation (which rereads the deletions)
on a 135mb index, with 5000 iterations
open & close time using new reader = 585609
open & close time using reopen = 27422
Almost 20x faster. Important in a highly interactive/incremental updating
index.
-----Original Message-----
From: jason rutherglen [mailto:[EMAIL PROTECTED]
Sent: Monday, May 01, 2006 1:24 PM
To: [email protected]
Subject: Re: GData, updateable IndexSearcher
I wanted to post a quick hack to see if it is along the correct lines. A
few of the questions regard whether to resuse existing MultiReaders or
simply strip out only the SegmentReaders. I do a compare on the segment
name and made it public. Thanks!
public static IndexReader reopen(IndexReader indexReader) throws IOException
{
if (indexReader instanceof MultiReader) {
MultiReader multiReader = (MultiReader)indexReader;
SegmentInfos segmentInfos = new SegmentInfos();
segmentInfos.read(indexReader.directory());
if (segmentInfos.size() == 1) { // index is optimized
return SegmentReader.get(segmentInfos, segmentInfos.info(0), false);
}
IndexReader[] existingIndexReaders = multiReader.getSubReaders();
// now go through and compare the segment readers
Map<String,SegmentReader> existingSegmentMap = new
HashMap<String,SegmentReader>();
getSegmentReaders(existingIndexReaders, existingSegmentMap);
Map<String,SegmentInfo> newSegmentInfosMap = new
HashMap<String,SegmentInfo>();
List<SegmentReader> newSegmentReaders = new
ArrayList<SegmentReader>();
Iterator segmentInfosIterator = segmentInfos.iterator();
while (segmentInfosIterator.hasNext()) {
SegmentInfo segmentInfo = (SegmentInfo)segmentInfosIterator.next();
if (!existingSegmentMap.containsKey(segmentInfo.name)) {
// it's new
SegmentReader newSegmentReader = SegmentReader.get(segmentInfo);
newSegmentReaders.add(newSegmentReader);
}
}
List<IndexReader> allSegmentReaders = new ArrayList<IndexReader>();
allSegmentReaders.add(multiReader);
allSegmentReaders.addAll(newSegmentReaders);
return new MultiReader(indexReader.directory(), segmentInfos, false,
(IndexReader[])allSegmentReaders.toArray(new IndexReader[0]));
}
throw new RuntimeException("indexReader not supported at this time");
}
public static void getSegmentReaders(IndexReader[] indexReaders,
Map<String,SegmentReader> map) {
for (int x=0; x < indexReaders.length; x++) {
if (indexReaders[x] instanceof MultiReader) {
MultiReader multiReader = (MultiReader)indexReaders[x];
IndexReader[] subReaders = multiReader.getSubReaders();
getSegmentReaders(subReaders, map);
} else if (indexReaders[x] instanceof SegmentReader) {
SegmentReader segmentReader = (SegmentReader)indexReaders[x];
map.put(segmentReader.segment, segmentReader);
}
}
}
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.store.Directory;
/**
* overridden to allow retrieval of contained IndexReader's to enable IndexReaderUtils.reopen()
*/
public class MyMultiReader extends MultiReader {
private IndexReader[] readers;
public MyMultiReader(Directory directory,SegmentInfos infos,IndexReader[] subReaders) throws IOException {
super(directory,infos,true,subReaders);
readers = subReaders;
}
public IndexReader[] getReaders() {
return readers;
}
public void doCommit() throws IOException {
super.doCommit();
}
}
package org.apache.lucene.index;
import java.io.IOException;
import java.util.*;
import org.apache.lucene.store.*;
public class IndexReaderUtils {
private static Map segments = new WeakHashMap();
static {
// must use String class name, otherwise instantiation order will not allow the override to work
System.setProperty("org.apache.lucene.SegmentReader.class","org.apache.lucene.index.MySegmentReader");
}
/**
* reopens the IndexReader, possibly reusing the segments for greater efficiency. The original IndexReader instance
* is closed, and the reference is no longer valid
*
* @return the new IndexReader
*/
public static synchronized IndexReader reopen(IndexReader ir) throws IOException {
final Directory directory = ir.directory();
if(!(ir instanceof MyMultiReader)) {
SegmentInfos infos = new SegmentInfos();
infos.read(directory);
IndexReader[] readers = new IndexReader[infos.size()];
for(int i=0;i<infos.size();i++){
readers[i] = MySegmentReader.get((SegmentInfo) infos.get(i));
}
// System.err.println("reopen, fresh reader with "+infos.size()+" segments");
return new MyMultiReader(directory,infos,readers);
}
MyMultiReader mr = (MyMultiReader) ir;
final IndexReader[] oldreaders = mr.getReaders();
final boolean[] stayopen = new boolean[oldreaders.length];
synchronized (directory) { // in- & inter-process sync
return (IndexReader)new Lock.With(
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
IndexWriter.COMMIT_LOCK_TIMEOUT) {
public Object doBody() throws IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(directory);
if (infos.size() == 1) { // index is optimized
// System.err.println("single segment during reopen");
return MySegmentReader.get(infos.info(0));
} else {
// System.err.println("reopen, has "+infos.size()+" segments");
IndexReader[] readers = new IndexReader[infos.size()];
for (int i = 0; i < infos.size(); i++) {
SegmentInfo newsi = (SegmentInfo) infos.get(i);
for(int j=0;j<oldreaders.length;j++) {
SegmentReader sr = (SegmentReader) oldreaders[j];
SegmentInfo si = (SegmentInfo) segments.get(sr);
if(si!=null && si.name.equals(newsi.name)) {
readers[i]=sr;
((MySegmentReader)sr).reopen();
stayopen[j]=true;
// System.err.println("keeping "+si.name+" on reopen");
}
}
if(readers[i]==null) {
readers[i] = MySegmentReader.get(newsi);
segments.put(readers[i],newsi);
}
}
for(int i=0;i<stayopen.length;i++)
if(!stayopen[i])
oldreaders[i].close();
return new MyMultiReader(directory,infos,readers);
}
}
}.run();
}
}
public static synchronized IndexReader open(String path) throws IOException {
Directory d = FSDirectory.getDirectory(path,false);
return open(d,true);
}
private static IndexReader open(final Directory directory, final boolean closeDirectory) throws IOException {
synchronized (directory) { // in- & inter-process sync
return (IndexReader)new Lock.With(
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
IndexWriter.COMMIT_LOCK_TIMEOUT) {
public Object doBody() throws IOException {
SegmentInfos infos = new SegmentInfos();
infos.read(directory);
if (infos.size() == 1) { // index is optimized
return MySegmentReader.get(infos.info(0));
} else {
IndexReader[] readers = new IndexReader[infos.size()];
for (int i = 0; i < infos.size(); i++) {
SegmentInfo si = infos.info(i);
readers[i] = MySegmentReader.get(si);
segments.put(readers[i],si);
}
return new MyMultiReader(directory,infos,readers);
}
}
}.run();
}
}
}
package org.apache.lucene.index;
import java.io.IOException;
import org.apache.lucene.util.BitVector;
public class MySegmentReader extends SegmentReader {
SegmentInfo si;
public MySegmentReader() {
}
public void reopen() throws IOException {
if (hasDeletions(si))
deletedDocs = new BitVector(directory(), si.name + ".del");
}
public static SegmentReader get(SegmentInfo si) throws IOException {
MySegmentReader reader = (MySegmentReader) SegmentReader.get(si);
reader.si = si;
return reader;
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]