This is a pretty big change, so I thought I'd let folks try it before I submit it. If anyone tries it, can you please report back on whether it breaks anything? I've already checked that it passes unit tests...
I made these changes a while ago. They conflicted with some changes that Dmitry made recently, but I think I resolved all the conflicts correctly. So, Dmitry in particular, do these look correct?
Thanks,
Doug
? patch.txt
? patches.txt
? testIndex
? TestDoc
Index: CHANGES.txt
===================================================================
RCS file: /home/cvs/jakarta-lucene/CHANGES.txt,v
retrieving revision 1.54
diff -u -u -r1.54 CHANGES.txt
--- CHANGES.txt 3 Oct 2003 15:16:24 -0000 1.54
+++ CHANGES.txt 17 Oct 2003 18:51:16 -0000
@@ -40,6 +40,18 @@
10. Added Locale setting to QueryParser, for use by date range parsing.
+11. Changed IndexReader so that it can be subclassed by classes
+ outside of its package. Previously it had package-private
+ abstract methods. Also modified the index merging code so that it
+ can work on an arbitrary IndexReader implementation, and added a
+ new method, IndexWriter.addIndexes(IndexReader[]), to take
+ advantage of this. (cutting)
+
+12. Added a limit to the number of clauses which may be added to a
+ BooleanQuery. The default limit is 1024 clauses. This should
+ stop most OutOfMemoryExceptions by prefix, wildcard and fuzzy
+ queries which run amok. (cutting)
+
1.3 RC1
Index: src/java/org/apache/lucene/index/FieldInfos.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/FieldInfos.java,v
retrieving revision 1.3
diff -u -u -r1.3 FieldInfos.java
--- src/java/org/apache/lucene/index/FieldInfos.java 30 Apr 2002 15:08:10 -0000
1.3
+++ src/java/org/apache/lucene/index/FieldInfos.java 17 Oct 2003 18:51:17 -0000
@@ -57,6 +57,8 @@
import java.util.Hashtable;
import java.util.Vector;
import java.util.Enumeration;
+import java.util.Collection;
+import java.util.Iterator;
import java.io.IOException;
import org.apache.lucene.document.Document;
@@ -92,11 +94,10 @@
}
}
- /** Merges in information from another FieldInfos. */
- final void add(FieldInfos other) {
- for (int i = 0; i < other.size(); i++) {
- FieldInfo fi = other.fieldInfo(i);
- add(fi.name, fi.isIndexed);
+ final void add(Collection names, boolean isIndexed) {
+ Iterator i = names.iterator();
+ while (i.hasNext()) {
+ add((String)i.next(), isIndexed);
}
}
Index: src/java/org/apache/lucene/index/FilterIndexReader.java
===================================================================
RCS file: src/java/org/apache/lucene/index/FilterIndexReader.java
diff -N src/java/org/apache/lucene/index/FilterIndexReader.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/java/org/apache/lucene/index/FilterIndexReader.java 17 Oct 2003 18:51:17
-0000
@@ -0,0 +1,149 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2003 The Apache Software Foundation. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+import java.io.IOException;
+import java.util.Collection;
+
+import org.apache.lucene.document.Document;
+
+/** A <code>FilterIndexReader</code> contains another IndexReader, which it
+ * uses as its basic source of data, possibly transforming the data along the
+ * way or providing additional functionality. The class
+ * <code>FilterIndexReader</code> itself simply implements all abstract methods
+ * of <code>IndexReader</code> with versions that pass all requests to the
+ * contained index reader. Subclasses of <code>FilterIndexReader</code> may
+ * further override some of these methods and may also provide additional
+ * methods and fields.
+*/
+public class FilterIndexReader extends IndexReader {
+
+ /** Base class for filtering [EMAIL PROTECTED] TermDocs} implementations. */
+ public static class FilterTermDocs implements TermDocs {
+ protected TermDocs in;
+
+ public FilterTermDocs(TermDocs in) { this.in = in; }
+
+ public void seek(Term term) throws IOException { in.seek(term); }
+ public void seek(TermEnum enum) throws IOException { in.seek(enum); }
+ public int doc() { return in.doc(); }
+ public int freq() { return in.freq(); }
+ public boolean next() throws IOException { return in.next(); }
+ public int read(int[] docs, int[] freqs) throws IOException {
+ return in.read(docs, freqs);
+ }
+ public boolean skipTo(int i) throws IOException { return in.skipTo(i); }
+ public void close() throws IOException { in.close(); }
+ }
+
+ /** Base class for filtering [EMAIL PROTECTED] TermPositions} implementations. */
+ public static class FilterTermPositions
+ extends FilterTermDocs implements TermPositions {
+
+ public FilterTermPositions(TermPositions in) { super(in); }
+
+ public int nextPosition() throws IOException {
+ return ((TermPositions)in).nextPosition();
+ }
+ }
+
+ /** Base class for filtering [EMAIL PROTECTED] TermEnum} implementations. */
+ public static class FilterTermEnum extends TermEnum {
+ protected TermEnum in;
+
+ public FilterTermEnum(TermEnum in) { this.in = in; }
+
+ public boolean next() throws IOException { return in.next(); }
+ public Term term() { return in.term(); }
+ public int docFreq() { return in.docFreq(); }
+ public void close() throws IOException { in.close(); }
+ }
+
+ protected IndexReader in;
+
+ public FilterIndexReader(IndexReader in) {
+ super(in.directory());
+ this.in = in;
+ }
+
+ public int numDocs() { return in.numDocs(); }
+ public int maxDoc() { return in.maxDoc(); }
+
+ public Document document(int n) throws IOException {return in.document(n);}
+
+ public boolean isDeleted(int n) { return in.isDeleted(n); }
+ public boolean hasDeletions() { return in.hasDeletions(); }
+
+ public byte[] norms(String f) throws IOException { return in.norms(f); }
+
+ public TermEnum terms() throws IOException { return in.terms(); }
+ public TermEnum terms(Term t) throws IOException { return in.terms(t); }
+
+ public int docFreq(Term t) throws IOException { return in.docFreq(t); }
+
+ public TermDocs termDocs() throws IOException { return in.termDocs(); }
+ public TermPositions termPositions() throws IOException {
+ return in.termPositions();
+ }
+
+ protected void doDelete(int n) throws IOException { in.doDelete(n); }
+ protected void doClose() throws IOException { in.doClose(); }
+
+ public Collection getFieldNames() throws IOException {
+ return in.getFieldNames();
+ }
+ public Collection getFieldNames(boolean indexed) throws IOException {
+ return in.getFieldNames(indexed);
+ }
+}
Index: src/java/org/apache/lucene/index/IndexReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
retrieving revision 1.20
diff -u -u -r1.20 IndexReader.java
--- src/java/org/apache/lucene/index/IndexReader.java 17 Oct 2003 10:49:42 -0000
1.20
+++ src/java/org/apache/lucene/index/IndexReader.java 17 Oct 2003 18:51:17 -0000
@@ -86,7 +86,7 @@
segmentInfosAge = Long.MAX_VALUE;
}
- Directory directory;
+ private Directory directory;
private Lock writeLock;
//used to determine whether index has chaged since reader was opened
@@ -131,6 +131,9 @@
}
}
+ /** Returns the directory this index resides in. */
+ public Directory directory() { return directory; }
+
/** Returns the time the index in the named directory was last modified. */
public static long lastModified(String directory) throws IOException {
return lastModified(new File(directory));
@@ -194,6 +197,9 @@
/** Returns true if document <i>n</i> has been deleted */
public abstract boolean isDeleted(int n);
+ /** Returns true if any documents have been deleted */
+ public abstract boolean hasDeletions();
+
/** Returns the byte-encoded normalization factor for the named field of
* every document. This is used by the search code to score documents.
*
@@ -286,7 +292,10 @@
doDelete(docNum);
}
- abstract void doDelete(int docNum) throws IOException;
+ /** Implements deletion of the document numbered <code>docNum</code>.
+ * Applications should call [EMAIL PROTECTED] #delete(int)} or [EMAIL PROTECTED]
#delete(Term)}.
+ */
+ protected abstract void doDelete(int docNum) throws IOException;
/** Deletes all documents containing <code>term</code>.
This is useful if one uses a document field to hold a unique ID string for
@@ -323,7 +332,7 @@
}
/** Implements close. */
- abstract void doClose() throws IOException;
+ protected abstract void doClose() throws IOException;
/** Release the write lock, if needed. */
protected final void finalize() throws IOException {
Index: src/java/org/apache/lucene/index/IndexWriter.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexWriter.java,v
retrieving revision 1.18
diff -u -u -r1.18 IndexWriter.java
--- src/java/org/apache/lucene/index/IndexWriter.java 17 Oct 2003 10:49:42 -0000
1.18
+++ src/java/org/apache/lucene/index/IndexWriter.java 17 Oct 2003 18:51:18 -0000
@@ -324,6 +324,37 @@
optimize(); // final cleanup
}
+ /** Merges the provided indexes into this index.
+ * <p>After this completes, the index is optimized. */
+ public synchronized void addIndexes(IndexReader[] readers)
+ throws IOException {
+
+ optimize(); // start with zero or 1 seg
+
+ String mergedName = newSegmentName();
+ SegmentMerger merger = new SegmentMerger(directory, mergedName, false);
+
+ if (segmentInfos.size() == 1) // add existing index, if any
+ merger.add(new SegmentReader(segmentInfos.info(0)));
+
+ for (int i = 0; i < readers.length; i++) // add new indexes
+ merger.add(readers[i]);
+
+ int docCount = merger.merge(); // merge 'em
+
+ segmentInfos.setSize(0); // pop old infos & add new
+ segmentInfos.addElement(new SegmentInfo(mergedName, docCount, directory));
+
+ synchronized (directory) { // in- & inter-process sync
+ new Lock.With(directory.makeLock("commit.lock")) {
+ public Object doBody() throws IOException {
+ segmentInfos.write(directory); // commit changes
+ return null;
+ }
+ }.run();
+ }
+ }
+
/** Merges all RAM-resident segments. */
private final void flushRamSegments() throws IOException {
int minSegment = segmentInfos.size()-1;
@@ -379,12 +410,12 @@
for (int i = minSegment; i < segmentInfos.size(); i++) {
SegmentInfo si = segmentInfos.info(i);
if (infoStream != null)
- infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
- SegmentReader reader = new SegmentReader(si);
+ infoStream.print(" " + si.name + " (" + si.docCount + " docs)");
+ IndexReader reader = new SegmentReader(si);
merger.add(reader);
- if ((reader.directory == this.directory) || // if we own the directory
- (reader.directory == this.ramDirectory))
- segmentsToDelete.addElement(reader); // queue segment for deletion
+ if ((reader.directory()==this.directory) || // if we own the directory
+ (reader.directory()==this.ramDirectory))
+ segmentsToDelete.addElement(reader); // queue segment for deletion
mergedDocCount += reader.numDocs();
}
if (infoStream != null) {
@@ -420,10 +451,10 @@
for (int i = 0; i < segments.size(); i++) {
SegmentReader reader = (SegmentReader)segments.elementAt(i);
- if (reader.directory == this.directory)
- deleteFiles(reader.files(), deletable); // try to delete our files
+ if (reader.directory() == this.directory)
+ deleteFiles(reader.files(), deletable); // try to delete our files
else
- deleteFiles(reader.files(), reader.directory); // delete, eg, RAM files
+ deleteFiles(reader.files(), reader.directory()); // delete other files
}
writeDeleteableFiles(deletable); // note files we can't delete
Index: src/java/org/apache/lucene/index/MultipleTermPositions.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/MultipleTermPositions.java,v
retrieving revision 1.2
diff -u -u -r1.2 MultipleTermPositions.java
--- src/java/org/apache/lucene/index/MultipleTermPositions.java 7 Nov 2002 05:55:39
-0000 1.2
+++ src/java/org/apache/lucene/index/MultipleTermPositions.java 17 Oct 2003 18:51:18
-0000
@@ -297,6 +297,11 @@
throw new UnsupportedOperationException();
}
+ public void seek(TermEnum termEnum) throws IOException {
+ throw new UnsupportedOperationException();
+ }
+
+
/**
* Describe <code>read</code> method here.
*
@@ -311,4 +316,5 @@
{
throw new UnsupportedOperationException();
}
+
}
Index: src/java/org/apache/lucene/index/SegmentMergeInfo.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMergeInfo.java,v
retrieving revision 1.1.1.1
diff -u -u -r1.1.1.1 SegmentMergeInfo.java
--- src/java/org/apache/lucene/index/SegmentMergeInfo.java 18 Sep 2001 16:29:53
-0000 1.1.1.1
+++ src/java/org/apache/lucene/index/SegmentMergeInfo.java 17 Oct 2003 18:51:18
-0000
@@ -60,30 +60,29 @@
final class SegmentMergeInfo {
Term term;
int base;
- SegmentTermEnum termEnum;
- SegmentReader reader;
- SegmentTermPositions postings;
+ TermEnum termEnum;
+ IndexReader reader;
+ TermPositions postings;
int[] docMap = null; // maps around deleted docs
- SegmentMergeInfo(int b, SegmentTermEnum te, SegmentReader r)
+ SegmentMergeInfo(int b, TermEnum te, IndexReader r)
throws IOException {
base = b;
reader = r;
termEnum = te;
term = te.term();
- postings = new SegmentTermPositions(r);
+ postings = reader.termPositions();
- if (reader.deletedDocs != null) {
- // build array which maps document numbers around deletions
- BitVector deletedDocs = reader.deletedDocs;
+ // build array which maps document numbers around deletions
+ if (reader.hasDeletions()) {
int maxDoc = reader.maxDoc();
docMap = new int[maxDoc];
int j = 0;
for (int i = 0; i < maxDoc; i++) {
- if (deletedDocs.get(i))
- docMap[i] = -1;
- else
- docMap[i] = j++;
+ if (reader.isDeleted(i))
+ docMap[i] = -1;
+ else
+ docMap[i] = j++;
}
}
}
Index: src/java/org/apache/lucene/index/SegmentMerger.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentMerger.java,v
retrieving revision 1.4
diff -u -u -r1.4 SegmentMerger.java
--- src/java/org/apache/lucene/index/SegmentMerger.java 13 Oct 2003 14:27:28 -0000
1.4
+++ src/java/org/apache/lucene/index/SegmentMerger.java 17 Oct 2003 18:51:19 -0000
@@ -83,29 +83,32 @@
useCompoundFile = compoundFile;
}
- final void add(SegmentReader reader) {
+ final void add(IndexReader reader) {
readers.addElement(reader);
}
- final SegmentReader segmentReader(int i) {
- return (SegmentReader)readers.elementAt(i);
+ final IndexReader segmentReader(int i) {
+ return (IndexReader)readers.elementAt(i);
}
- final void merge() throws IOException {
+ final int merge() throws IOException {
+ int value;
try {
mergeFields();
mergeTerms();
- mergeNorms();
+ value = mergeNorms();
} finally {
for (int i = 0; i < readers.size(); i++) { // close readers
- SegmentReader reader = (SegmentReader)readers.elementAt(i);
- reader.close();
+ IndexReader reader = (IndexReader)readers.elementAt(i);
+ reader.close();
}
}
if (useCompoundFile)
createCompoundFile();
+
+ return value;
}
private final void createCompoundFile()
@@ -149,8 +152,9 @@
private final void mergeFields() throws IOException {
fieldInfos = new FieldInfos(); // merge field names
for (int i = 0; i < readers.size(); i++) {
- SegmentReader reader = (SegmentReader)readers.elementAt(i);
- fieldInfos.add(reader.fieldInfos);
+ IndexReader reader = (IndexReader)readers.elementAt(i);
+ fieldInfos.add(reader.getFieldNames(true), true);
+ fieldInfos.add(reader.getFieldNames(false), false);
}
fieldInfos.write(directory, segment + ".fnm");
@@ -158,12 +162,11 @@
new FieldsWriter(directory, segment, fieldInfos);
try {
for (int i = 0; i < readers.size(); i++) {
- SegmentReader reader = (SegmentReader)readers.elementAt(i);
- BitVector deletedDocs = reader.deletedDocs;
- int maxDoc = reader.maxDoc();
- for (int j = 0; j < maxDoc; j++)
- if (deletedDocs == null || !deletedDocs.get(j)) // skip deleted docs
- fieldsWriter.addDocument(reader.document(j));
+ IndexReader reader = (IndexReader)readers.elementAt(i);
+ int maxDoc = reader.maxDoc();
+ for (int j = 0; j < maxDoc; j++)
+ if (!reader.isDeleted(j)) // skip deleted docs
+ fieldsWriter.addDocument(reader.document(j));
}
} finally {
fieldsWriter.close();
@@ -196,8 +199,8 @@
queue = new SegmentMergeQueue(readers.size());
int base = 0;
for (int i = 0; i < readers.size(); i++) {
- SegmentReader reader = (SegmentReader)readers.elementAt(i);
- SegmentTermEnum termEnum = (SegmentTermEnum)reader.terms();
+ IndexReader reader = (IndexReader)readers.elementAt(i);
+ TermEnum termEnum = reader.terms();
SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader);
base += reader.numDocs();
if (smi.next())
@@ -246,42 +249,40 @@
termInfosWriter.add(smis[0].term, termInfo);
}
}
-
+
private final int appendPostings(SegmentMergeInfo[] smis, int n)
throws IOException {
int lastDoc = 0;
int df = 0; // number of docs w/ term
for (int i = 0; i < n; i++) {
SegmentMergeInfo smi = smis[i];
- SegmentTermPositions postings = smi.postings;
+ TermPositions postings = smi.postings;
int base = smi.base;
int[] docMap = smi.docMap;
- smi.termEnum.termInfo(termInfo);
- postings.seek(termInfo);
+ postings.seek(smi.termEnum);
while (postings.next()) {
- int doc;
- if (docMap == null)
- doc = base + postings.doc; // no deletions
- else
- doc = base + docMap[postings.doc]; // re-map around deletions
+ int doc = postings.doc();
+ if (docMap != null)
+ doc = docMap[doc]; // map around deletions
+ doc += base; // convert to merged space
if (doc < lastDoc)
throw new IllegalStateException("docs out of order");
int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1
lastDoc = doc;
-
- int freq = postings.freq;
+
+ int freq = postings.freq();
if (freq == 1) {
freqOutput.writeVInt(docCode | 1); // write doc & freq=1
} else {
freqOutput.writeVInt(docCode); // write doc
freqOutput.writeVInt(freq); // write frequency in doc
}
-
+
int lastPosition = 0; // write position deltas
- for (int j = 0; j < freq; j++) {
- int position = postings.nextPosition();
+ for (int j = 0; j < freq; j++) {
+ int position = postings.nextPosition();
proxOutput.writeVInt(position - lastPosition);
lastPosition = position;
}
@@ -291,33 +292,31 @@
}
return df;
}
-
- private final void mergeNorms() throws IOException {
+ private final int mergeNorms() throws IOException {
+ int docCount = 0;
for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed) {
- OutputStream output = directory.createFile(segment + ".f" + i);
- try {
- for (int j = 0; j < readers.size(); j++) {
- SegmentReader reader = (SegmentReader)readers.elementAt(j);
- BitVector deletedDocs = reader.deletedDocs;
- InputStream input = reader.normStream(fi.name);
+ OutputStream output = directory.createFile(segment + ".f" + i);
+ try {
+ for (int j = 0; j < readers.size(); j++) {
+ IndexReader reader = (IndexReader)readers.elementAt(j);
+ byte[] input = reader.norms(fi.name);
int maxDoc = reader.maxDoc();
- try {
- for (int k = 0; k < maxDoc; k++) {
- byte norm = input != null ? input.readByte() : (byte)0;
- if (deletedDocs == null || !deletedDocs.get(k))
- output.writeByte(norm);
+ for (int k = 0; k < maxDoc; k++) {
+ byte norm = input != null ? input[k] : (byte)0;
+ if (!reader.isDeleted(k)) {
+ output.writeByte(norm);
+ docCount++;
}
- } finally {
- if (input != null)
- input.close();
- }
- }
- } finally {
- output.close();
- }
+ }
+ }
+ } finally {
+ output.close();
+ }
}
}
+ return docCount;
}
+
}
Index: src/java/org/apache/lucene/index/SegmentReader.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
retrieving revision 1.13
diff -u -u -r1.13 SegmentReader.java
--- src/java/org/apache/lucene/index/SegmentReader.java 17 Oct 2003 10:49:42 -0000
1.13
+++ src/java/org/apache/lucene/index/SegmentReader.java 17 Oct 2003 18:51:19 -0000
@@ -110,9 +110,9 @@
segment = si.name;
// Use compound file directory for some files, if it exists
- Directory cfsDir = directory;
- if (directory.fileExists(segment + ".cfs")) {
- cfsReader = new CompoundFileReader(directory, segment + ".cfs");
+ Directory cfsDir = directory();
+ if (directory().fileExists(segment + ".cfs")) {
+ cfsReader = new CompoundFileReader(directory(), segment + ".cfs");
cfsDir = cfsReader;
}
@@ -124,7 +124,7 @@
// NOTE: the bitvector is stored using the regular directory, not cfs
if (hasDeletions(si))
- deletedDocs = new BitVector(directory, segment + ".del");
+ deletedDocs = new BitVector(directory(), segment + ".del");
// make sure that all index files have been read or are kept open
// so that if an index update removes them we'll still have them
@@ -133,16 +133,15 @@
openNorms(cfsDir);
}
-
- final synchronized void doClose() throws IOException {
+ protected final synchronized void doClose() throws IOException {
if (deletedDocsDirty) {
- synchronized (directory) { // in- & inter-process sync
- new Lock.With(directory.makeLock(IndexWriter.COMMIT_LOCK_NAME),
+ synchronized (directory()) { // in- & inter-process sync
+ new Lock.With(directory().makeLock(IndexWriter.COMMIT_LOCK_NAME),
IndexWriter.COMMIT_LOCK_TIMEOUT) {
public Object doBody() throws IOException {
- deletedDocs.write(directory, segment + ".tmp");
- directory.renameFile(segment + ".tmp", segment + ".del");
- directory.touchFile("segments");
+ deletedDocs.write(directory(), segment + ".tmp");
+ directory().renameFile(segment + ".tmp", segment + ".del");
+ directory().touchFile("segments");
return null;
}
}.run();
@@ -164,18 +163,22 @@
cfsReader.close();
if (closeDirectory)
- directory.close();
+ directory().close();
}
static final boolean hasDeletions(SegmentInfo si) throws IOException {
return si.dir.fileExists(si.name + ".del");
}
+ public boolean hasDeletions() {
+ return deletedDocs != null;
+ }
+
static final boolean usesCompoundFile(SegmentInfo si) throws IOException {
return si.dir.fileExists(si.name + ".cfs");
}
- final synchronized void doDelete(int docNum) throws IOException {
+ protected final synchronized void doDelete(int docNum) throws IOException {
if (deletedDocs == null)
deletedDocs = new BitVector(maxDoc());
deletedDocsDirty = true;
@@ -190,7 +193,7 @@
for (int i=0; i<ext.length; i++) {
String name = segment + "." + ext[i];
- if (directory.fileExists(name))
+ if (directory().fileExists(name))
files.addElement(name);
}
Index: src/java/org/apache/lucene/index/SegmentTermDocs.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermDocs.java,v
retrieving revision 1.2
diff -u -u -r1.2 SegmentTermDocs.java
--- src/java/org/apache/lucene/index/SegmentTermDocs.java 21 Jan 2002 17:07:23
-0000 1.2
+++ src/java/org/apache/lucene/index/SegmentTermDocs.java 17 Oct 2003 18:51:20
-0000
@@ -78,6 +78,15 @@
seek(ti);
}
+ public void seek(TermEnum enum) throws IOException {
+ TermInfo ti;
+ if (enum instanceof SegmentTermEnum) // optimized case
+ ti = ((SegmentTermEnum)enum).termInfo();
+ else // punt case
+ ti = parent.tis.get(enum.term());
+ seek(ti);
+ }
+
void seek(TermInfo ti) throws IOException {
if (ti == null) {
freqCount = 0;
Index: src/java/org/apache/lucene/index/SegmentTermPositions.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentTermPositions.java,v
retrieving revision 1.3
diff -u -u -r1.3 SegmentTermPositions.java
--- src/java/org/apache/lucene/index/SegmentTermPositions.java 8 Feb 2002 22:52:03
-0000 1.3
+++ src/java/org/apache/lucene/index/SegmentTermPositions.java 17 Oct 2003 18:51:20
-0000
@@ -106,6 +106,7 @@
public final int read(final int[] docs, final int[] freqs)
throws IOException {
- throw new RuntimeException();
+ throw new UnsupportedOperationException();
}
+
}
Index: src/java/org/apache/lucene/index/SegmentsReader.java
===================================================================
RCS file:
/home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentsReader.java,v
retrieving revision 1.12
diff -u -u -r1.12 SegmentsReader.java
--- src/java/org/apache/lucene/index/SegmentsReader.java 10 Sep 2003 14:27:37
-0000 1.12
+++ src/java/org/apache/lucene/index/SegmentsReader.java 17 Oct 2003 18:51:23
-0000
@@ -76,6 +76,7 @@
private Hashtable normsCache = new Hashtable();
private int maxDoc = 0;
private int numDocs = -1;
+ private boolean hasDeletions = false;
SegmentsReader(Directory directory, SegmentReader[] r) throws IOException {
super(directory);
@@ -84,6 +85,9 @@
for (int i = 0; i < readers.length; i++) {
starts[i] = maxDoc;
maxDoc += readers[i].maxDoc(); // compute maxDocs
+
+ if (readers[i].hasDeletions())
+ hasDeletions = true;
}
starts[readers.length] = maxDoc;
}
@@ -112,10 +116,13 @@
return readers[i].isDeleted(n - starts[i]); // dispatch to segment reader
}
- final synchronized void doDelete(int n) throws IOException {
+ public boolean hasDeletions() { return hasDeletions; }
+
+ protected final synchronized void doDelete(int n) throws IOException {
numDocs = -1; // invalidate cache
int i = readerIndex(n); // find segment num
readers[i].doDelete(n - starts[i]); // dispatch to segment reader
+ hasDeletions = true;
}
private final int readerIndex(int n) { // find reader for doc n:
@@ -174,7 +181,7 @@
return new SegmentsTermPositions(readers, starts);
}
- final synchronized void doClose() throws IOException {
+ protected final synchronized void doClose() throws IOException {
for (int i = 0; i < readers.length; i++)
readers[i].close();
}
@@ -309,6 +316,10 @@
this.current = null;
}
+ public void seek(TermEnum termEnum) throws IOException {
+ seek(termEnum.term());
+ }
+
public final boolean next() throws IOException {
if (current != null && current.next()) {
return true;
@@ -389,4 +400,5 @@
public final int nextPosition() throws IOException {
return ((SegmentTermPositions)current).nextPosition();
}
+
}
Index: src/java/org/apache/lucene/index/TermDocs.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermDocs.java,v
retrieving revision 1.4
diff -u -u -r1.4 TermDocs.java
--- src/java/org/apache/lucene/index/TermDocs.java 29 Jan 2003 17:18:54 -0000
1.4
+++ src/java/org/apache/lucene/index/TermDocs.java 17 Oct 2003 18:51:23 -0000
@@ -71,6 +71,11 @@
*/
void seek(Term term) throws IOException;
+ /** Sets this to the data for the current term in a [EMAIL PROTECTED] TermEnum}.
+ * This may be optimized in some implementations.
+ */
+ void seek(TermEnum termEnum) throws IOException;
+
/** Returns the current document number. <p> This is invalid until [EMAIL
PROTECTED]
#next()} is called for the first time.*/
int doc();
Index: src/test/org/apache/lucene/index/TestFilterIndexReader.java
===================================================================
RCS file: src/test/org/apache/lucene/index/TestFilterIndexReader.java
diff -N src/test/org/apache/lucene/index/TestFilterIndexReader.java
--- /dev/null 1 Jan 1970 00:00:00 -0000
+++ src/test/org/apache/lucene/index/TestFilterIndexReader.java 17 Oct 2003 18:51:23
-0000
@@ -0,0 +1,175 @@
+package org.apache.lucene.index;
+
+/* ====================================================================
+ * The Apache Software License, Version 1.1
+ *
+ * Copyright (c) 2001, 2002, 2003 The Apache Software Foundation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ *
+ * 3. The end-user documentation included with the redistribution,
+ * if any, must include the following acknowledgment:
+ * "This product includes software developed by the
+ * Apache Software Foundation (http://www.apache.org/)."
+ * Alternately, this acknowledgment may appear in the software itself,
+ * if and wherever such third-party acknowledgments normally appear.
+ *
+ * 4. The names "Apache" and "Apache Software Foundation" and
+ * "Apache Lucene" must not be used to endorse or promote products
+ * derived from this software without prior written permission. For
+ * written permission, please contact [EMAIL PROTECTED]
+ *
+ * 5. Products derived from this software may not be called "Apache",
+ * "Apache Lucene", nor may "Apache" appear in their name, without
+ * prior written permission of the Apache Software Foundation.
+ *
+ * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
+ * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+ * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
+ * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ * ====================================================================
+ *
+ * This software consists of voluntary contributions made by many
+ * individuals on behalf of the Apache Software Foundation. For more
+ * information on the Apache Software Foundation, please see
+ * <http://www.apache.org/>.
+ */
+
+
+import junit.framework.TestCase;
+import junit.framework.TestSuite;
+import junit.textui.TestRunner;
+import junit.framework.TestResult;
+
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.RAMDirectory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+
+import java.util.Collection;
+import java.io.IOException;
+
+public class TestFilterIndexReader extends TestCase {
+
+ private static class TestReader extends FilterIndexReader {
+
+ /** Filter that only permits terms containing 'e'.*/
+ private static class TestTermEnum extends FilterTermEnum {
+ public TestTermEnum(TermEnum enum)
+ throws IOException {
+ super(enum);
+ }
+
+ /** Scan for terms containing the letter 'e'.*/
+ public boolean next() throws IOException {
+ while (in.next()) {
+ if (in.term().text().indexOf('e') != -1)
+ return true;
+ }
+ return false;
+ }
+ }
+
+ /** Filter that only returns odd numbered documents. */
+ private static class TestTermPositions extends FilterTermPositions {
+ public TestTermPositions(TermPositions in)
+ throws IOException {
+ super(in);
+ }
+
+ /** Scan for odd numbered documents. */
+ public boolean next() throws IOException {
+ while (in.next()) {
+ if ((in.doc() % 2) == 1)
+ return true;
+ }
+ return false;
+ }
+ }
+
+ public TestReader(IndexReader reader) {
+ super(reader);
+ }
+
+ /** Filter terms with TestTermEnum. */
+ public TermEnum terms() throws IOException {
+ return new TestTermEnum(in.terms());
+ }
+
+ /** Filter positions with TestTermPositions. */
+ public TermPositions termPositions() throws IOException {
+ return new TestTermPositions(in.termPositions());
+ }
+ }
+
+
+ /** Main for running test case by itself. */
+ public static void main(String args[]) {
+ TestRunner.run (new TestSuite(TestIndexReader.class));
+ }
+
+ /**
+ * Tests the IndexReader.getFieldNames implementation
+ * @throws Exception on error
+ */
+ public void testFilterIndexReader() throws Exception {
+ RAMDirectory directory = new RAMDirectory();
+ IndexWriter writer =
+ new IndexWriter(directory, new WhitespaceAnalyzer(), true);
+
+ Document d1 = new Document();
+ d1.add(Field.Text("default","one two"));
+ writer.addDocument(d1);
+
+ Document d2 = new Document();
+ d2.add(Field.Text("default","one three"));
+ writer.addDocument(d2);
+
+ Document d3 = new Document();
+ d3.add(Field.Text("default","two four"));
+ writer.addDocument(d3);
+
+ writer.close();
+
+ IndexReader reader = new TestReader(IndexReader.open(directory));
+
+ TermEnum terms = reader.terms();
+ while (terms.next()) {
+ assertTrue(terms.term().text().indexOf('e') != -1);
+ }
+ terms.close();
+
+ TermPositions positions = reader.termPositions(new Term("default", "one"));
+ while (positions.next()) {
+ assertTrue((positions.doc() % 2) == 1);
+ }
+
+ reader.close();
+ }
+}--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]
