Hi,

Here's a patch that changes the default size of the internal buffer in this class to a bigger value, and reads it from the config file.

In my experience the default value was set too low, and after increasing it to 1MB I could observe a significant (~15%) performance increase in disk-intensive operations, at the cost of increased memory consumption.

If there are no objections I'd like to apply it on Monday.

--
Best regards,
Andrzej Bialecki

-------------------------------------------------
Software Architect, System Integration Specialist
CEN/ISSS EC Workshop, ECIMF project chair
EU FP6 E-Commerce Expert/Evaluator
-------------------------------------------------
FreeBSD developer (http://www.freebsd.org)


Index: conf/nutch-default.xml
===================================================================
RCS file: /cvsroot/nutch/nutch/conf/nutch-default.xml,v
retrieving revision 1.56
diff -d -u -r1.56 nutch-default.xml
--- conf/nutch-default.xml      22 Oct 2004 23:30:20 -0000      1.56
+++ conf/nutch-default.xml      22 Oct 2004 23:39:38 -0000
@@ -304,6 +304,16 @@
   should minimize seeks.</description>
 </property>
 
+<property>
+  <name>io.raf.buffer.size</name>
+  <value>1048576</value>
+  <description>The size of buffer for use in BufferedRandomAccessFile.
+  The size of this buffer should probably be a multiple of hardware
+  page size (4096 on Intel x86), and it determines how much data is
+  buffered during read and write operations.</description>
+</property>
+  
+
 <!-- indexer properties -->
 
 <property>
Index: src/java/net/nutch/io/BufferedRandomAccessFile.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/io/BufferedRandomAccessFile.java,v
retrieving revision 1.1
diff -d -u -r1.1 BufferedRandomAccessFile.java
--- src/java/net/nutch/io/BufferedRandomAccessFile.java 20 Aug 2004 20:36:11 -0000     
 1.1
+++ src/java/net/nutch/io/BufferedRandomAccessFile.java 22 Oct 2004 23:39:38 -0000
@@ -358,12 +358,13 @@
     private boolean isReadOnly;                   // if false, then writeOnly
     
     public BufferedRandomAccessFile(NutchFileSystem nfs, String file, boolean 
isReadOnly) throws IOException {
-        this(nfs, file, 4096, isReadOnly);
+        this(nfs, file, -1, isReadOnly);
     }
 
     public BufferedRandomAccessFile(NutchFileSystem nfs, String file, int bufLen,
                                     boolean isReadOnly) throws IOException {
         super(nfs, file, isReadOnly ? "r" : "rw");
+        if (bufLen == -1) bufLen = NutchConf.getInt("io.raf.buffer.size", 4096);
         this.buf = new byte[bufLen];
         this.isReadOnly = isReadOnly;
     }
Index: src/java/net/nutch/io/SequenceFile.java
===================================================================
RCS file: /cvsroot/nutch/nutch/src/java/net/nutch/io/SequenceFile.java,v
retrieving revision 1.26
diff -d -u -r1.26 SequenceFile.java
--- src/java/net/nutch/io/SequenceFile.java     20 Aug 2004 20:36:11 -0000      1.26
+++ src/java/net/nutch/io/SequenceFile.java     22 Oct 2004 23:39:38 -0000
@@ -130,7 +130,7 @@
 
     /** Open the named file. */
     public Reader(NutchFileSystem nfs, String file) throws IOException {
-      this(nfs, file, 4096);
+      this(nfs, file, -1);
     }
 
     private Reader(NutchFileSystem nfs, String file, int bufferSize) throws 
IOException {

Reply via email to