Author: tilman
Date: Sun Mar 19 18:27:29 2023
New Revision: 1908525

URL: http://svn.apache.org/viewvc?rev=1908525&view=rev
Log:
PDFBOX-5575: optimize LZWFilter, by Axel Howind; fix javadoc

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java?rev=1908525&r1=1908524&r2=1908525&view=diff
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java 
(original)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java 
Sun Mar 19 18:27:29 2023
@@ -66,18 +66,12 @@ public class LZWFilter extends Filter
             COSDictionary parameters, int index) throws IOException
     {
         COSDictionary decodeParams = getDecodeParams(parameters, index);
-        int earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1);
-
-        if (earlyChange != 0 && earlyChange != 1)
-        {
-            earlyChange = 1;
-        }
-
+        boolean earlyChange = decodeParams.getInt(COSName.EARLY_CHANGE, 1) != 
0;
         doLZWDecode(encoded, Predictor.wrapPredictor(decoded, decodeParams), 
earlyChange);
         return new DecodeResult(parameters);
     }
 
-    private void doLZWDecode(InputStream encoded, OutputStream decoded, int 
earlyChange) throws IOException
+    private static void doLZWDecode(InputStream encoded, OutputStream decoded, 
boolean earlyChange) throws IOException
     {
         List<byte[]> codeTable = new ArrayList<>();
         int chunk = 9;
@@ -133,7 +127,7 @@ public class LZWFilter extends Filter
         decoded.flush();
     }
 
-    private void checkIndexBounds(List<byte[]> codeTable, long index, 
MemoryCacheImageInputStream in)
+    private static void checkIndexBounds(List<byte[]> codeTable, long index, 
MemoryCacheImageInputStream in)
             throws IOException
     {
         if (index < 0)
@@ -181,7 +175,7 @@ public class LZWFilter extends Filter
                     if (newFoundCode == -1)
                     {
                         // use previous
-                        chunk = calculateChunk(codeTable.size() - 1, 1);
+                        chunk = calculateChunk(codeTable.size() - 1, true);
                         out.writeBits(foundCode, chunk);
                         // create new table entry
                         codeTable.add(inputPattern);
@@ -204,7 +198,7 @@ public class LZWFilter extends Filter
             }
             if (foundCode != -1)
             {
-                chunk = calculateChunk(codeTable.size() - 1, 1);
+                chunk = calculateChunk(codeTable.size() - 1, true);
                 out.writeBits(foundCode, chunk);
             }
             
@@ -213,7 +207,7 @@ public class LZWFilter extends Filter
             // possibly adjusted the chunk. Therefore, the encoder must behave 
as
             // if the code table had just grown and thus it must be checked it 
is
             // needed to adjust the chunk, based on an increased table size 
parameter
-            chunk = calculateChunk(codeTable.size(), 1);
+            chunk = calculateChunk(codeTable.size(), true);
             
             out.writeBits(EOD, chunk);
             
@@ -226,50 +220,47 @@ public class LZWFilter extends Filter
     }
 
     /**
-     * Find the longest matching pattern in the code table.
+     * Find a matching pattern in the code table.
      *
      * @param codeTable The LZW code table.
      * @param pattern The pattern to be searched for.
-     * @return The index of the longest matching pattern or -1 if nothing is
-     * found.
+     * @return The index of the matching pattern or -1 if nothing is found.
      */
-    private int findPatternCode(List<byte[]> codeTable, byte[] pattern)
+    private static int findPatternCode(List<byte[]> codeTable, byte[] pattern)
     {
-        int foundCode = -1;
-        int foundLen = 0;
-        for (int i = codeTable.size() - 1; i >= 0; --i)
+        // for the first 256 entries, index matches value
+        if (pattern.length == 1)
         {
-            if (i <= EOD)
-            {
-                // we're in the single byte area
-                if (foundCode != -1)
-                {
-                    // we already found pattern with size > 1
-                    return foundCode; 
-                }
-                else if (pattern.length > 1)
-                {
-                    // we won't find anything here anyway
-                    return -1;
-                }
-            }
-            byte[] tryPattern = codeTable.get(i);
-            if ((foundCode != -1 || tryPattern.length > foundLen) && 
Arrays.equals(tryPattern, pattern))
+            return pattern[0];
+        }
+
+        // no need to test the first 256 + 2 entries against longer patterns
+        for (int i = 257; i < codeTable.size(); i++)
+        {
+            if (Arrays.equals(codeTable.get(i), pattern))
             {
-                foundCode = i;
-                foundLen = tryPattern.length;
+                return i;
             }
         }
-        return foundCode;
+
+        return -1;
     }
 
     /**
-     * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE
-     * markers.
+     * Init the code table with 1 byte entries and the EOD and CLEAR_TABLE 
markers.
      */
-    private List<byte[]> createCodeTable()
+    private static List<byte[]> createCodeTable()
     {
         List<byte[]> codeTable = new ArrayList<>(4096);
+        codeTable.addAll(INITIAL_CODE_TABLE);
+        return codeTable;
+    }
+
+    private static final List<byte[]> INITIAL_CODE_TABLE = 
createInitialCodeTable();
+
+    private static List<byte[]> createInitialCodeTable()
+    {
+        List<byte[]> codeTable = new ArrayList<>(258);
         for (int i = 0; i < 256; ++i)
         {
             codeTable.add(new byte[] { (byte) (i & 0xFF) });
@@ -283,21 +274,22 @@ public class LZWFilter extends Filter
      * Calculate the appropriate chunk size
      *
      * @param tabSize the size of the code table
-     * @param earlyChange 0 or 1 for early chunk increase
+     * @param earlyChange true for early chunk increase
      *
      * @return a value between 9 and 12
      */
-    private int calculateChunk(int tabSize, int earlyChange)
+    private static int calculateChunk(int tabSize, boolean earlyChange)
     {
-        if (tabSize >= 2048 - earlyChange)
+        int i = tabSize + (earlyChange ? 1 : 0);
+        if (i >= 2048)
         {
             return 12;
         }
-        if (tabSize >= 1024 - earlyChange)
+        if (i >= 1024)
         {
             return 11;
         }
-        if (tabSize >= 512 - earlyChange)
+        if (i >= 512)
         {
             return 10;
         }


Reply via email to