Author: cutting
Date: Wed Mar  9 10:58:26 2005
New Revision: 156669

URL: http://svn.apache.org/viewcvs?view=rev&rev=156669
Log:
Added IndexWriter.setTermIndexInterval().

Modified:
    lucene/java/trunk/CHANGES.txt
    lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
    lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
    lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java

Modified: lucene/java/trunk/CHANGES.txt
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/CHANGES.txt?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/CHANGES.txt (original)
+++ lucene/java/trunk/CHANGES.txt Wed Mar  9 10:58:26 2005
@@ -78,6 +78,10 @@
     compound index file.
     (adapted from code contributed by Garrett Rooney; committed by Bernhard)
     
+13. Add IndexWriter.setTermIndexInterval() method.  See javadocs.
+    (Doug Cutting)
+
+    
 API Changes
 
  1. Several methods and fields have been deprecated. The API documentation 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java 
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java Wed 
Mar  9 10:58:26 2005
@@ -39,10 +39,11 @@
   private Similarity similarity;
   private FieldInfos fieldInfos;
   private int maxFieldLength;
+  private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
   private PrintStream infoStream;
 
-  /**
-   * 
+  /** This ctor used by test code only.
+   *
    * @param directory The directory to write the document information to
    * @param analyzer The analyzer to use for the document
    * @param similarity The Similarity function
@@ -56,6 +57,14 @@
     this.maxFieldLength = maxFieldLength;
   }
 
+  DocumentWriter(Directory directory, Analyzer analyzer, IndexWriter writer) {
+    this.directory = directory;
+    this.analyzer = analyzer;
+    this.similarity = writer.getSimilarity();
+    this.maxFieldLength = writer.getMaxFieldLength();
+    this.termIndexInterval = writer.getTermIndexInterval();
+  }
+
   final void addDocument(String segment, Document doc)
           throws IOException {
     // write field names
@@ -295,7 +304,8 @@
       //open files for inverse index storage
       freq = directory.createOutput(segment + ".frq");
       prox = directory.createOutput(segment + ".prx");
-      tis = new TermInfosWriter(directory, segment, fieldInfos);
+      tis = new TermInfosWriter(directory, segment, fieldInfos,
+                                termIndexInterval);
       TermInfo ti = new TermInfo();
       String currentField = null;
 

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java 
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexWriter.java Wed Mar 
 9 10:58:26 2005
@@ -103,6 +103,16 @@
       "10000"));
 
 
+  /** The default value for [EMAIL PROTECTED] #getTermIndexInterval()}.  This 
is
+   * determined by the <code>org.apache.lucene.termIndexInterval</code> system
+   * property.  The default is 128.
+   */
+  public static final int DEFAULT_TERM_INDEX_INTERVAL =
+    Integer.parseInt(System.getProperty("org.apache.lucene.termIndexInterval",
+                                        "128"));
+
+
+
   private Directory directory;  // where this index resides
   private Analyzer analyzer;    // how to analyze text
 
@@ -113,6 +123,8 @@
 
   private Lock writeLock;
 
+  private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL;
+
   /** Use compound file setting. Defaults to true, minimizing the number of
    * files used.  Setting this to false may improve indexing performance, but
    * may also cause file handle problems.
@@ -154,6 +166,26 @@
     return this.similarity;
   }
 
+  /** Expert: Set the interval between indexed terms.  Large values cause less
+   * memory to be used by IndexReader, but slow random-access to terms.  Small
+   * values cause more memory to be used by an IndexReader, and speed
+   * random-access to terms.  In particular,
+   * <code>numUniqueTerms/interval</code> terms are read into memory by an
+   * IndexReader, and, on average, <code>interval/2</code> terms must be
+   * scanned for each random term access.
+   *
+   * @see #DEFAULT_TERM_INDEX_INTERVAL
+   */
+  public void setTermIndexInterval(int interval) {
+    this.termIndexInterval = interval;
+  }
+
+  /** Expert: Return the interval between indexed terms.
+   *
+   * @see #setTermIndexInterval(int)
+   */
+  public int getTermIndexInterval() { return termIndexInterval; }
+
   /**
    * Constructs an IndexWriter for the index in <code>path</code>.
    * Text will be analyzed with <code>a</code>.  If <code>create</code>
@@ -359,6 +391,11 @@
     }
   }
 
+  /** Returns the Directory used by this index. */
+  public Directory getDirectory() {
+      return directory;
+  }
+
   /** Returns the analyzer used by this index. */
   public Analyzer getAnalyzer() {
       return analyzer;
@@ -408,7 +445,7 @@
    */
   public void addDocument(Document doc, Analyzer analyzer) throws IOException {
     DocumentWriter dw =
-      new DocumentWriter(ramDirectory, analyzer, similarity, maxFieldLength);
+      new DocumentWriter(ramDirectory, analyzer, this);
     dw.setInfoStream(infoStream);
     String segmentName = newSegmentName();
     dw.addDocument(segmentName, doc);
@@ -514,7 +551,7 @@
     optimize();                                          // start with zero or 
1 seg
 
     final String mergedName = newSegmentName();
-    SegmentMerger merger = new SegmentMerger(directory, mergedName);
+    SegmentMerger merger = new SegmentMerger(this, mergedName);
 
     final Vector segmentsToDelete = new Vector();
     IndexReader sReader = null;
@@ -609,7 +646,7 @@
     final String mergedName = newSegmentName();
     if (infoStream != null) infoStream.print("merging segments");
     SegmentMerger merger =
-        new SegmentMerger(directory, mergedName);
+        new SegmentMerger(this, mergedName);
 
     final Vector segmentsToDelete = new Vector();
     for (int i = minSegment; i < segmentInfos.size(); i++) {

Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java 
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Wed 
Mar  9 10:58:26 2005
@@ -39,6 +39,7 @@
 final class SegmentMerger {
   private Directory directory;
   private String segment;
+  private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL;
 
   private Vector readers = new Vector();
   private FieldInfos fieldInfos;
@@ -51,7 +52,7 @@
     "tvx", "tvd", "tvf"
   };
 
-  /**
+  /** This ctor used only by test code.
    * 
    * @param dir The Directory to merge the other segments into
    * @param name The name of the new segment
@@ -61,6 +62,12 @@
     segment = name;
   }
 
+  SegmentMerger(IndexWriter writer, String name) {
+    directory = writer.getDirectory();
+    segment = name;
+    termIndexInterval = writer.getTermIndexInterval();
+  }
+
   /**
    * Add an IndexReader to the collection of readers that are to be merged
    * @param reader
@@ -220,7 +227,8 @@
       freqOutput = directory.createOutput(segment + ".frq");
       proxOutput = directory.createOutput(segment + ".prx");
       termInfosWriter =
-              new TermInfosWriter(directory, segment, fieldInfos);
+              new TermInfosWriter(directory, segment, fieldInfos,
+                                  termIndexInterval);
       skipInterval = termInfosWriter.skipInterval;
       queue = new SegmentMergeQueue(readers.size());
 

Modified: 
lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java
URL: 
http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java?view=diff&r1=156668&r2=156669
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java 
(original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermInfosWriter.java Wed 
Mar  9 10:58:26 2005
@@ -61,20 +61,22 @@
 
   private TermInfosWriter other = null;
 
-  TermInfosWriter(Directory directory, String segment, FieldInfos fis)
+  TermInfosWriter(Directory directory, String segment, FieldInfos fis,
+                  int interval)
        throws IOException {
-    initialize(directory, segment, fis, false);
-    other = new TermInfosWriter(directory, segment, fis, true);
+    initialize(directory, segment, fis, interval, false);
+    other = new TermInfosWriter(directory, segment, fis, interval, true);
     other.other = this;
   }
 
   private TermInfosWriter(Directory directory, String segment, FieldInfos fis,
-        boolean isIndex) throws IOException {
-    initialize(directory, segment, fis, isIndex);
+                          int interval, boolean isIndex) throws IOException {
+    initialize(directory, segment, fis, interval, isIndex);
   }
 
   private void initialize(Directory directory, String segment, FieldInfos fis,
-         boolean isi) throws IOException {
+                          int interval, boolean isi) throws IOException {
+    indexInterval = interval;
     fieldInfos = fis;
     isIndex = isi;
     output = directory.createOutput(segment + (isIndex ? ".tii" : ".tis"));


Reply via email to