goller 2004/04/20 06:47:58 Modified: src/java/org/apache/lucene/index SegmentTermEnum.java SegmentTermDocs.java SegmentMerger.java TermInfosWriter.java Log: hopefully corrected or at least improved version of skipTo Revision Changes Path 1.7 +26 -8 jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java Index: SegmentTermEnum.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermEnum.java,v retrieving revision 1.6 retrieving revision 1.7 diff -u -r1.6 -r1.7 --- SegmentTermEnum.java 19 Apr 2004 14:46:00 -0000 1.6 +++ SegmentTermEnum.java 20 Apr 2004 13:47:58 -0000 1.7 @@ -33,6 +33,7 @@ long indexPointer = 0; int indexInterval; int skipInterval; + private int formatM1SkipInterval; Term prev; private char[] buffer = {}; @@ -51,7 +52,7 @@ // back-compatible settings indexInterval = 128; - skipInterval = Integer.MAX_VALUE; + skipInterval = Integer.MAX_VALUE; // switch off skipTo optimization } else { // we have a format version number @@ -62,8 +63,17 @@ throw new IOException("Unknown format version:" + format); size = input.readLong(); // read the size - - if (!isIndex) { + + if(format == -1){ + if (!isIndex) { + indexInterval = input.readInt(); + formatM1SkipInterval = input.readInt(); + } + // switch off skipTo optimization for file format prior to 1.4rc2 in order to avoid a bug in + // skipTo implementation of these versions + skipInterval = Integer.MAX_VALUE; + } + else{ indexInterval = input.readInt(); skipInterval = input.readInt(); } @@ -107,13 +117,21 @@ termInfo.docFreq = input.readVInt(); // read doc freq termInfo.freqPointer += input.readVLong(); // read freq pointer termInfo.proxPointer += input.readVLong(); // read prox pointer - - if (!isIndex) { - if (termInfo.docFreq > skipInterval) { - termInfo.skipOffset = input.readVInt(); + + if(format == -1){ + // just read skipOffset in order to increment file pointer; + // value is never used since skipTo is switched off + if (!isIndex) { + if (termInfo.docFreq > formatM1SkipInterval) { + termInfo.skipOffset = input.readVInt(); + } } } - + else{ + if (termInfo.docFreq >= skipInterval) + termInfo.skipOffset = input.readVInt(); + } + if (isIndex) indexPointer += input.readVLong(); // read index pointer 1.12 +3 -1 jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java Index: SegmentTermDocs.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v retrieving revision 1.11 retrieving revision 1.12 diff -u -r1.11 -r1.12 --- SegmentTermDocs.java 19 Apr 2004 19:32:20 -0000 1.11 +++ SegmentTermDocs.java 20 Apr 2004 13:47:58 -0000 1.12 @@ -84,6 +84,8 @@ public void close() throws IOException { freqStream.close(); + if (skipStream != null) + skipStream.close(); } public final int doc() { return doc; } @@ -143,7 +145,7 @@ /** Optimized implementation. */ public boolean skipTo(int target) throws IOException { - if (df > skipInterval) { // optimized case + if (df >= skipInterval) { // optimized case if (skipStream == null) skipStream = (InputStream) freqStream.clone(); // lazily clone 1.10 +3 -2 jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java Index: SegmentMerger.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java,v retrieving revision 1.9 retrieving revision 1.10 diff -u -r1.9 -r1.10 --- SegmentMerger.java 29 Mar 2004 22:48:02 -0000 1.9 +++ SegmentMerger.java 20 Apr 2004 13:47:58 -0000 1.10 @@ -234,6 +234,7 @@ private OutputStream freqOutput = null; private OutputStream proxOutput = null; private TermInfosWriter termInfosWriter = null; + private int skipInterval; private SegmentMergeQueue queue = null; private final void mergeTerms() throws IOException { @@ -242,6 +243,8 @@ proxOutput = directory.createFile(segment + ".prx"); termInfosWriter = new TermInfosWriter(directory, segment, fieldInfos); + skipInterval = termInfosWriter.skipInterval; + queue = new SegmentMergeQueue(readers.size()); mergeTermInfos(); @@ -254,7 +257,6 @@ } private final void mergeTermInfos() throws IOException { - queue = new SegmentMergeQueue(readers.size()); int base = 0; for (int i = 0; i < readers.size(); i++) { IndexReader reader = (IndexReader) readers.elementAt(i); @@ -327,7 +329,6 @@ */ private final int appendPostings(SegmentMergeInfo[] smis, int n) throws IOException { - final int skipInterval = termInfosWriter.skipInterval; int lastDoc = 0; int df = 0; // number of docs w/ term resetSkip(); 1.6 +6 -10 jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java Index: TermInfosWriter.java =================================================================== RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermInfosWriter.java,v retrieving revision 1.5 retrieving revision 1.6 diff -u -r1.5 -r1.6 --- TermInfosWriter.java 25 Mar 2004 13:49:49 -0000 1.5 +++ TermInfosWriter.java 20 Apr 2004 13:47:58 -0000 1.6 @@ -27,13 +27,13 @@ final class TermInfosWriter { /** The file format version, a negative number. */ - public static final int FORMAT = -1; + public static final int FORMAT = -2; private FieldInfos fieldInfos; private OutputStream output; private Term lastTerm = new Term("", ""); private TermInfo lastTi = new TermInfo(); - private int size = 0; + private long size = 0; // TODO: the default values for these two parameters should be settable from // IndexWriter. However, once that's done, folks will start setting them to @@ -80,10 +80,8 @@ output = directory.createFile(segment + (isIndex ? ".tii" : ".tis")); output.writeInt(FORMAT); // write format output.writeLong(0); // leave space for size - if (!isIndex) { - output.writeInt(indexInterval); // write indexInterval - output.writeInt(skipInterval); // write skipInterval - } + output.writeInt(indexInterval); // write indexInterval + output.writeInt(skipInterval); // write skipInterval } /** Adds a new <Term, TermInfo> pair to the set. @@ -106,10 +104,8 @@ output.writeVLong(ti.freqPointer - lastTi.freqPointer); // write pointers output.writeVLong(ti.proxPointer - lastTi.proxPointer); - if (!isIndex) { - if (ti.docFreq > skipInterval) { - output.writeVInt(ti.skipOffset); - } + if (ti.docFreq >= skipInterval) { + output.writeVInt(ti.skipOffset); } if (isIndex) {
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]