Author: cutting
Date: Mon Mar 7 12:28:04 2005
New Revision: 156444
URL: http://svn.apache.org/viewcvs?view=rev&rev=156444
Log:
Read term index lazily at first random-access to terms.
Modified:
lucene/java/trunk/CHANGES.txt
lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java
Modified: lucene/java/trunk/CHANGES.txt
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?view=diff&r1=156443&r2=156444
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Mon Mar 7 12:28:04 2005
@@ -159,6 +159,13 @@
7. Use uncached access to norms when merging to reduce RAM usage.
(Bug #32847). (Doug Cutting)
+ 8. Don't read term index when random-access is not required. This
+ reduces time to open IndexReaders and they use less memory when
+ random access is not required, e.g., when merging segments. The
+ term index is now read into memory lazily at the first
+ random-access. (Doug Cutting)
+
+
Infrastructure
1. Lucene's source code repository has converted from CVS to
Modified:
lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java
URL:
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java?view=diff&r1=156443&r2=156444
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosReader.java Mon
Mar 7 12:28:04 2005
@@ -33,6 +33,12 @@
private SegmentTermEnum origEnum;
private long size;
+ private Term[] indexTerms = null;
+ private TermInfo[] indexInfos;
+ private long[] indexPointers;
+
+ private SegmentTermEnum indexEnum;
+
TermInfosReader(Directory dir, String seg, FieldInfos fis)
throws IOException {
directory = dir;
@@ -42,7 +48,10 @@
origEnum = new SegmentTermEnum(directory.openInput(segment + ".tis"),
fieldInfos, false);
size = origEnum.size;
- readIndex();
+
+ indexEnum =
+ new SegmentTermEnum(directory.openInput(segment + ".tii"),
+ fieldInfos, true);
}
protected void finalize() {
@@ -73,28 +82,24 @@
return termEnum;
}
- Term[] indexTerms = null;
- TermInfo[] indexInfos;
- long[] indexPointers;
-
- private final void readIndex() throws IOException {
- SegmentTermEnum indexEnum =
- new SegmentTermEnum(directory.openInput(segment + ".tii"),
- fieldInfos, true);
+ private final void ensureIndexIsRead() throws IOException {
+ if (indexTerms != null) // index already read
+ return; // do nothing
try {
- int indexSize = (int)indexEnum.size;
+ int indexSize = (int)indexEnum.size; // otherwise read index
indexTerms = new Term[indexSize];
indexInfos = new TermInfo[indexSize];
indexPointers = new long[indexSize];
-
+
for (int i = 0; indexEnum.next(); i++) {
- indexTerms[i] = indexEnum.term();
- indexInfos[i] = indexEnum.termInfo();
- indexPointers[i] = indexEnum.indexPointer;
+ indexTerms[i] = indexEnum.term();
+ indexInfos[i] = indexEnum.termInfo();
+ indexPointers[i] = indexEnum.indexPointer;
}
} finally {
- indexEnum.close();
+ indexEnum.close();
+ indexEnum = null;
}
}
@@ -126,6 +131,8 @@
TermInfo get(Term term) throws IOException {
if (size == 0) return null;
+ ensureIndexIsRead();
+
// optimize sequential access: first try scanning cached enum w/o seeking
SegmentTermEnum enumerator = getEnum();
if (enumerator.term() != null // term is at or past current
@@ -179,6 +186,7 @@
final long getPosition(Term term) throws IOException {
if (size == 0) return -1;
+ ensureIndexIsRead();
int indexOffset = getIndexOffset(term);
seekEnum(indexOffset);