Author: lehmi
Date: Mon Sep 11 20:34:14 2017
New Revision: 1808066

URL: http://svn.apache.org/viewvc?rev=1808066&view=rev
Log:
PDFBOX-3928: optimize brute force search, minor improvements

Modified:
    pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java?rev=1808066&r1=1808065&r2=1808066&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java 
(original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java 
Mon Sep 11 20:34:14 2017
@@ -1500,12 +1500,13 @@ public class COSParser extends BaseParse
         long lastObjectId = Long.MIN_VALUE;
         int lastGenID = Integer.MIN_VALUE;
         long lastObjOffset = Long.MIN_VALUE;
-        String objString = " obj";
-        char[] string = objString.toCharArray();
+        char[] objString = " obj".toCharArray();
+        char[] endobjString = "endobj".toCharArray();
+        boolean endobjFound = false;
         do
         {
             source.seek(currentOffset);
-            if (isString(string))
+            if (isString(objString))
             {
                 long tempOffset = currentOffset - 1;
                 source.seek(tempOffset);
@@ -1541,16 +1542,23 @@ public class COSParser extends BaseParse
                             lastObjectId = objectId;
                             lastGenID = genID;
                             lastObjOffset = tempOffset + 1;
+                            currentOffset += objString.length - 1;
+                            endobjFound = false;
                         }
                     }
                 }
             }
+            else if (isString(endobjString))
+            {
+                endobjFound = true;
+                currentOffset += endobjString.length - 1;
+            }
             currentOffset++;
         } while (currentOffset < lastEOFMarker && !source.isEOF());
-        if (lastEOFMarker < Long.MAX_VALUE && lastObjOffset > 0)
+        if ((lastEOFMarker < Long.MAX_VALUE || endobjFound) && lastObjOffset > 
0)
         {
-            // if the pdf wasn't cut off in the middle the last object id has 
to added here
-            // so that it can't get lost as there isn't any subsequent object 
id
+            // if the pdf wasn't cut off in the middle or if the last object 
ends with a "endobj" marker
+            // the last object id has to be added here so that it can't get 
lost as there isn't any subsequent object id
             bfSearchCOSObjectKeyOffsets.put(new COSObjectKey(lastObjectId, 
lastGenID),
                     lastObjOffset);
         }
@@ -1937,10 +1945,7 @@ public class COSParser extends BaseParse
                 }
                 numberOfBytes += readMore;
             }
-            if (Arrays.equals(string, bytesRead))
-            {
-                bytesMatching = true;
-            }
+            bytesMatching = Arrays.equals(string, bytesRead);
             source.rewind(numberOfBytes);
         }
         return bytesMatching;
@@ -1962,6 +1967,7 @@ public class COSParser extends BaseParse
             if (source.read() != c)
             {
                 bytesMatching = false;
+                break;
             }
         }
         source.seek(originOffset);


Reply via email to