[cp-patches] FYI: ZipFile performance improvement

Roman Kennke Tue, 15 Aug 2006 04:34:06 -0700

Here comes a significant performance improvement for Zipfile, done byIngo. It avoids expensive UTF8 decoding when possible (most cases, forASCII) and optimizes readLeShort() and readLeInt() for the case when thebuffer has enough bytes. Mauve shows no regressions.


2006-08-15  Ingo Proetel  <[EMAIL PROTECTED]>


        * java/util/zip/ZipFile.java
        (PartialInputStream.UTF8DECODER): New constant field, used
        for decoding UTF8 strings.
        (readLeShort): Access buffer directly if it has enough bytes
        available.
        (readLeInt): Access buffer directly if it has enough bytes
        available.
        (decodeChars): New helper method for decoding UTF8 strings.
        (readString): Avoid NIO charset decoder if possible.

/Roman

Index: java/util/zip/ZipFile.java
===================================================================
RCS file: /cvsroot/classpath/classpath/java/util/zip/ZipFile.java,v
retrieving revision 1.35
diff -u -1 -2 -r1.35 ZipFile.java
--- java/util/zip/ZipFile.java	12 Jul 2006 17:00:34 -0000	1.35
+++ java/util/zip/ZipFile.java	15 Aug 2006 11:27:08 -0000
@@ -39,24 +39,27 @@
 
 package java.util.zip;
 
 import gnu.java.util.EmptyEnumeration;
 
 import java.io.EOFException;
 import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.RandomAccessFile;
 import java.io.UnsupportedEncodingException;
+import java.nio.ByteBuffer;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
 import java.util.Enumeration;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 
 /**
  * This class represents a Zip archive.  You can ask for the contained
  * entries, or get an input stream for a file entry.  The entry is
  * automatically decompressed.
  *
  * This class is thread safe:  You can open input streams for arbitrary
  * entries in different threads.
  *
@@ -507,24 +510,30 @@
 
     public Object nextElement()
     {
       /* We return a clone, just to be safe that the user doesn't
        * change the entry.  
        */
       return ((ZipEntry)elements.next()).clone();
     }
   }
 
   private static final class PartialInputStream extends InputStream
   {
+    /**
+     * The UTF-8 decoder use for decoding the filenames.
+     */
+    private static final CharsetDecoder UTF8DECODER =
+      Charset.forName("UTF-8").newDecoder();
+
     private final RandomAccessFile raf;
     private final byte[] buffer;
     private long bufferOffset;
     private int pos;
     private long end;
     // We may need to supply an extra dummy byte to our reader.
     // See Inflater.  We use a count here to simplify the logic
     // elsewhere in this class.  Note that we ignore the dummy
     // byte in methods where we know it is not needed.
     private int dummyByteCount;
 
     public PartialInputStream(RandomAccessFile raf, int bufferSize)
@@ -643,63 +652,125 @@
       if (read(buf, 0, buf.length) != buf.length)
         throw new EOFException();
     }
 
     void readFully(byte[] buf, int off, int len) throws IOException
     {
       if (read(buf, off, len) != len)
         throw new EOFException();
     }
 
     int readLeShort() throws IOException
     {
-      int b0 = read();
-      int b1 = read();
-      if (b1 == -1)
-        throw new EOFException();
-      return (b0 & 0xff) | (b1 & 0xff) << 8;
+      int result;
+      if(pos + 1 < buffer.length)
+        {
+          result = ((buffer[pos + 0] & 0xff) | (buffer[pos + 1] & 0xff) << 8);
+          pos += 2;
+        }
+      else
+        {
+          int b0 = read();
+          int b1 = read();
+          if (b1 == -1)
+            throw new EOFException();
+          result = (b0 & 0xff) | (b1 & 0xff) << 8;
+        }
+      return result;
     }
 
     int readLeInt() throws IOException
     {
-      int b0 = read();
-      int b1 = read();
-      int b2 = read();
-      int b3 = read();
-      if (b3 == -1)
-        throw new EOFException();
-      return ((b0 & 0xff) | (b1 & 0xff) << 8)
-            | ((b2 & 0xff) | (b3 & 0xff) << 8) << 16;
+      int result;
+      if(pos + 3 < buffer.length)
+        {
+          result = (((buffer[pos + 0] & 0xff) | (buffer[pos + 1] & 0xff) << 8)
+                   | ((buffer[pos + 2] & 0xff)
+                       | (buffer[pos + 3] & 0xff) << 8) << 16);
+          pos += 4;
+        }
+      else
+        {
+          int b0 = read();
+          int b1 = read();
+          int b2 = read();
+          int b3 = read();
+          if (b3 == -1)
+            throw new EOFException();
+          result =  (((b0 & 0xff) | (b1 & 0xff) << 8) | ((b2 & 0xff)
+                    | (b3 & 0xff) << 8) << 16);
+        }
+      return result;
+    }
+
+    /**
+     * Decode chars from byte buffer using UTF8 encoding.  This
+     * operation is performance-critical since a jar file contains a
+     * large number of strings for the name of each file in the
+     * archive.  This routine therefore avoids using the expensive
+     * utf8Decoder when decoding is straightforward.
+     *
+     * @param buffer the buffer that contains the encoded character
+     *        data
+     * @param pos the index in buffer of the first byte of the encoded
+     *        data
+     * @param length the length of the encoded data in number of
+     *        bytes.
+     *
+     * @return a String that contains the decoded characters.
+     */
+    private String decodeChars(byte[] buffer, int pos, int length)
+      throws IOException
+    {
+      String result;
+      int i=length - 1;
+      while ((i >= 0) && (buffer[i] <= 0x7f))
+        {
+          i--;
+        }
+      if (i < 0)
+        {
+          result = new String(buffer, 0, pos, length);
+        }
+      else
+        {
+          ByteBuffer bufferBuffer = ByteBuffer.wrap(buffer, pos, length);
+          UTF8DECODER.reset();
+          char [] characters = UTF8DECODER.decode(bufferBuffer).array();
+          result = String.valueOf(characters);
+        }
+      return result;
     }
 
     String readString(int length) throws IOException
     {
       if (length > end - (bufferOffset + pos))
         throw new EOFException();
 
+      String result = null;
       try
         {
           if (buffer.length - pos >= length)
             {
-              String s = new String(buffer, pos, length, "UTF-8");
+              result = decodeChars(buffer, pos, length);
               pos += length;
-              return s;
             }
           else
             {
               byte[] b = new byte[length];
               readFully(b);
-              return new String(b, 0, length, "UTF-8");
+              result = decodeChars(b, 0, length);
             }
         }
       catch (UnsupportedEncodingException uee)
         {
           throw new AssertionError(uee);
         }
+      return result;
     }
 
     public void addDummyByte()
     {
       dummyByteCount = 1;
     }
   }
 }

[cp-patches] FYI: ZipFile performance improvement

Reply via email to