Hi Guys,

I would like to fix a class in contrib/misc/src/java/org/apache/lucene/index called IndexSplitter. It has a bug - when splits the segments in separate index the segment descriptor file contains a wrong data - the number (the name) of next segment to generate is 0. Although it can not cause exception in some cases (depends on existing segment names and the number of newly generated ones) in most of cases it do cases Exception.

I do not know if I would have rights to submit this fix to Lucene contrib dir but I am attaching the fix and a test that shows the exception when using original class and there is no exception when using fixing class.

Cheers,
Ivan
/**
 * 
 */
package test;

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexSplitter;
import org.apache.lucene.index.IndexSplitterFixed;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * @author ivasilev
 *
 */
public class TestIndexSplitter {
        
        private static File INDEX_PATH = new 
File("E:/Temp/ContribIndexSpliter/index");
        private static File INDEX_SPLIT_PATH = new 
File("E:/Temp/ContribIndexSpliter/splitIndex");
        private static File INDEX_SPLIT_FIXED_PATH = new 
File("E:/Temp/ContribIndexSpliter/splitFixedIndex");
        
        public static void main(String[] args) throws IOException {
                initDirs();
                createIndex();
                splitIndexWithBothSplitters();
                deleteFirstDocAndOptimize(INDEX_SPLIT_FIXED_PATH);
                readIndex(INDEX_SPLIT_FIXED_PATH);
                deleteFirstDocAndOptimize(INDEX_SPLIT_PATH); // might throw 
exception
                readIndex(INDEX_SPLIT_PATH); // surely throws Exception
        }
        
        private static void initDirs() {
                initDir(INDEX_PATH);
                initDir(INDEX_SPLIT_PATH);
                initDir(INDEX_SPLIT_FIXED_PATH);
        }
        
        private static void initDir(File dir) {
                if ( ! dir.exists()) {
                        dir.mkdirs();
                }
                for (File currFile : dir.listFiles()) {
                        if (currFile.isFile()) {
                                currFile.delete();
                        }
                }
        }
        
        private static void createIndex() throws IOException {
                IndexWriter iw = null;
                try {
                        IndexWriterConfig iwConfig = new 
IndexWriterConfig(Version.LUCENE_32,
                                new StandardAnalyzer(Version.LUCENE_32));
                        iwConfig.setOpenMode(OpenMode.CREATE);
                        iw = new IndexWriter(FSDirectory.open(INDEX_PATH), 
iwConfig);
                        Document doc = new Document();
                        doc.add(new Field("content", "doc 1", Store.YES, 
Index.ANALYZED_NO_NORMS));
                        iw.addDocument(doc);
                        doc = new Document();
                        doc.add(new Field("content", "doc 2", Store.YES, 
Index.ANALYZED_NO_NORMS));
                        iw.addDocument(doc);
                        iw.close();
                } finally {
                        if (iw != null) {
                                iw.close();
                        }
                }
        }
        
        private static void splitIndexWithBothSplitters() throws IOException {
                IndexSplitter is = new IndexSplitter(INDEX_PATH);
                is.split(INDEX_SPLIT_PATH, new String[] { "_0" });
                IndexSplitterFixed isf = new IndexSplitterFixed(INDEX_PATH);
                isf.split(INDEX_SPLIT_FIXED_PATH, new String[] { "_0" });
        }
        
        private static void deleteFirstDocAndOptimize(File indexDir) throws 
IOException {
                IndexReader ir = null;
                IndexWriter iw = null;
                try {
                        ir = IndexReader.open(FSDirectory.open(indexDir), 
false);
                        ir.deleteDocument(0);
                        ir.close();
                        IndexWriterConfig iwConfig = new 
IndexWriterConfig(Version.LUCENE_32,
                                new StandardAnalyzer(Version.LUCENE_32));
                        iw = new IndexWriter(FSDirectory.open(indexDir), 
iwConfig);
                        iw.optimize();
                } finally {
                        if (ir != null) {
                                ir.close();
                        }
                        if (iw != null) {
                                iw.close();
                        }
                }
        }
        
        private static void readIndex(File indexDir) throws IOException {
                IndexReader ir = null;
                try {
                        ir = IndexReader.open(FSDirectory.open(indexDir));
                        System.out.println(indexDir.getPath() + " index -> 
ir.numDocs = " + ir.numDocs());
                } finally {
                        if (ir != null) {
                                ir.close();
                        }
                }
        }
        
}
/**
 * 
 */
package org.apache.lucene.index;

import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;

import org.apache.lucene.store.FSDirectory;

/**
 * @author ivasilev
 *
 */
public class IndexSplitterFixed extends IndexSplitter {
        
        public IndexSplitterFixed(File dir) throws IOException {
                super(dir);
        }
        
        public void split(File destDir, String[] segs) throws IOException {
            destDir.mkdirs();
            FSDirectory destFSDir = FSDirectory.open(destDir);
            SegmentInfos destInfos = new SegmentInfos();
            destInfos.counter = nextSegmentName(segs); // the fix
            for (String n : segs) {
              SegmentInfo info = getInfo(n);
              destInfos.add(info);
              // now copy files over
              List<String> files = info.files();
              for (final String srcName : files) {
                File srcFile = new File(dir, srcName);
                File destFile = new File(destDir, srcName);
                copyFile(srcFile, destFile);
              }
            }
            destInfos.changed();
            destInfos.commit(destFSDir);
            // System.out.println("destDir:"+destDir.getAbsolutePath());
        }
        
        private int nextSegmentName(String ... segs) {
                int ret = 0;
                for (String currSeg : segs) {
                        int currSegNum = Integer.parseInt(currSeg.substring(1), 
36);
                        if (ret < currSegNum) {
                                ret = currSegNum;
                        }
                }
                return ++ret;
        }
        
        // following methods just copyied from IndexSplitter (as there are 
private)

        private SegmentInfo getInfo(String name) {
            for (int x = 0; x < infos.size(); x++) {
                if (name.equals(infos.info(x).name))
                return infos.info(x);
            }
            return null;
        }
          
        private static final byte[] copyBuffer = new byte[32*1024];
         
        private static void copyFile(File src, File dst) throws IOException {
            InputStream in = new FileInputStream(src);
            OutputStream out = new FileOutputStream(dst);
            int len;
            while ((len = in.read(copyBuffer)) > 0) {
                out.write(copyBuffer, 0, len);
            }
            in.close();
            out.close();
        }
        
}

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscr...@lucene.apache.org
For additional commands, e-mail: java-user-h...@lucene.apache.org

Reply via email to