Hi Guys,
I would like to fix a class in
contrib/misc/src/java/org/apache/lucene/index called IndexSplitter. It
has a bug - when splits the segments in separate index the segment
descriptor file contains a wrong data - the number (the name) of next
segment to generate is 0. Although it can not cause exception in some
cases (depends on existing segment names and the number of newly
generated ones) in most of cases it do cases Exception.
I do not know if I would have rights to submit this fix to Lucene
contrib dir but I am attaching the fix and a test that shows the
exception when using original class and there is no exception when using
fixing class.
Cheers,
Ivan
/**
*
*/
package test;
import java.io.File;
import java.io.IOException;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexSplitter;
import org.apache.lucene.index.IndexSplitterFixed;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
/**
* @author ivasilev
*
*/
public class TestIndexSplitter {
private static File INDEX_PATH = new
File("E:/Temp/ContribIndexSpliter/index");
private static File INDEX_SPLIT_PATH = new
File("E:/Temp/ContribIndexSpliter/splitIndex");
private static File INDEX_SPLIT_FIXED_PATH = new
File("E:/Temp/ContribIndexSpliter/splitFixedIndex");
public static void main(String[] args) throws IOException {
initDirs();
createIndex();
splitIndexWithBothSplitters();
deleteFirstDocAndOptimize(INDEX_SPLIT_FIXED_PATH);
readIndex(INDEX_SPLIT_FIXED_PATH);
deleteFirstDocAndOptimize(INDEX_SPLIT_PATH); // might throw
exception
readIndex(INDEX_SPLIT_PATH); // surely throws Exception
}
private static void initDirs() {
initDir(INDEX_PATH);
initDir(INDEX_SPLIT_PATH);
initDir(INDEX_SPLIT_FIXED_PATH);
}
private static void initDir(File dir) {
if ( ! dir.exists()) {
dir.mkdirs();
}
for (File currFile : dir.listFiles()) {
if (currFile.isFile()) {
currFile.delete();
}
}
}
private static void createIndex() throws IOException {
IndexWriter iw = null;
try {
IndexWriterConfig iwConfig = new
IndexWriterConfig(Version.LUCENE_32,
new StandardAnalyzer(Version.LUCENE_32));
iwConfig.setOpenMode(OpenMode.CREATE);
iw = new IndexWriter(FSDirectory.open(INDEX_PATH),
iwConfig);
Document doc = new Document();
doc.add(new Field("content", "doc 1", Store.YES,
Index.ANALYZED_NO_NORMS));
iw.addDocument(doc);
doc = new Document();
doc.add(new Field("content", "doc 2", Store.YES,
Index.ANALYZED_NO_NORMS));
iw.addDocument(doc);
iw.close();
} finally {
if (iw != null) {
iw.close();
}
}
}
private static void splitIndexWithBothSplitters() throws IOException {
IndexSplitter is = new IndexSplitter(INDEX_PATH);
is.split(INDEX_SPLIT_PATH, new String[] { "_0" });
IndexSplitterFixed isf = new IndexSplitterFixed(INDEX_PATH);
isf.split(INDEX_SPLIT_FIXED_PATH, new String[] { "_0" });
}
private static void deleteFirstDocAndOptimize(File indexDir) throws
IOException {
IndexReader ir = null;
IndexWriter iw = null;
try {
ir = IndexReader.open(FSDirectory.open(indexDir),
false);
ir.deleteDocument(0);
ir.close();
IndexWriterConfig iwConfig = new
IndexWriterConfig(Version.LUCENE_32,
new StandardAnalyzer(Version.LUCENE_32));
iw = new IndexWriter(FSDirectory.open(indexDir),
iwConfig);
iw.optimize();
} finally {
if (ir != null) {
ir.close();
}
if (iw != null) {
iw.close();
}
}
}
private static void readIndex(File indexDir) throws IOException {
IndexReader ir = null;
try {
ir = IndexReader.open(FSDirectory.open(indexDir));
System.out.println(indexDir.getPath() + " index ->
ir.numDocs = " + ir.numDocs());
} finally {
if (ir != null) {
ir.close();
}
}
}
}
/**
*
*/
package org.apache.lucene.index;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import org.apache.lucene.store.FSDirectory;
/**
* @author ivasilev
*
*/
public class IndexSplitterFixed extends IndexSplitter {
public IndexSplitterFixed(File dir) throws IOException {
super(dir);
}
public void split(File destDir, String[] segs) throws IOException {
destDir.mkdirs();
FSDirectory destFSDir = FSDirectory.open(destDir);
SegmentInfos destInfos = new SegmentInfos();
destInfos.counter = nextSegmentName(segs); // the fix
for (String n : segs) {
SegmentInfo info = getInfo(n);
destInfos.add(info);
// now copy files over
List<String> files = info.files();
for (final String srcName : files) {
File srcFile = new File(dir, srcName);
File destFile = new File(destDir, srcName);
copyFile(srcFile, destFile);
}
}
destInfos.changed();
destInfos.commit(destFSDir);
// System.out.println("destDir:"+destDir.getAbsolutePath());
}
private int nextSegmentName(String ... segs) {
int ret = 0;
for (String currSeg : segs) {
int currSegNum = Integer.parseInt(currSeg.substring(1),
36);
if (ret < currSegNum) {
ret = currSegNum;
}
}
return ++ret;
}
// following methods just copyied from IndexSplitter (as there are
private)
private SegmentInfo getInfo(String name) {
for (int x = 0; x < infos.size(); x++) {
if (name.equals(infos.info(x).name))
return infos.info(x);
}
return null;
}
private static final byte[] copyBuffer = new byte[32*1024];
private static void copyFile(File src, File dst) throws IOException {
InputStream in = new FileInputStream(src);
OutputStream out = new FileOutputStream(dst);
int len;
while ((len = in.read(copyBuffer)) > 0) {
out.write(copyBuffer, 0, len);
}
in.close();
out.close();
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org