Author: tilman
Date: Sat Feb 15 10:26:24 2025
New Revision: 1923832

URL: http://svn.apache.org/viewvc?rev=1923832&view=rev
Log:
PDFBOX-5957: refactor, add comments, improve logging

Modified:
    
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1923832&r1=1923831&r2=1923832&view=diff
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 (original)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 Sat Feb 15 10:26:24 2025
@@ -333,7 +333,10 @@ public class PDFStreamParser extends Bas
         boolean noBinData = true;
         int startOpIdx = -1;
         int endOpIdx = -1;
-        
+        String s = "";
+
+        LOG.debug("String after EI: '{}'", new String(binCharTestArr));
+
         if (readBytes > 0)
         {
             for (int bIdx = 0; bIdx < readBytes; bIdx++)
@@ -360,25 +363,28 @@ public class PDFStreamParser extends Bas
             // PDFBOX-3742: just assuming that 1-3 non blanks is a PDF 
operator isn't enough
             if (endOpIdx != -1 && startOpIdx != -1)
             {
-                // usually, the operator here is Q, sometimes EMC 
(PDFBOX-2376), S (PDFBOX-3784).
-                String s = new String(binCharTestArr, startOpIdx, endOpIdx - 
startOpIdx);
+                // usually, the operator here is Q, sometimes EMC 
(PDFBOX-2376), S (PDFBOX-3784)
+                s = new String(binCharTestArr, startOpIdx, endOpIdx - 
startOpIdx);
                 if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s))
                 {
+                    // operator is not Q, not EMC, not S -> assume binary data
                     noBinData = false;
                 }
             }
 
-            // only if not close to eof
-            if (readBytes == MAX_BIN_CHAR_TEST_LENGTH) 
+            // only if not close to EOF
+            if (startOpIdx != -1 && readBytes == MAX_BIN_CHAR_TEST_LENGTH) 
             {
-                // a PDF operator is 1-3 bytes long
-                if (startOpIdx != -1 && endOpIdx == -1)
+                if (endOpIdx == -1)
                 {
                     endOpIdx = MAX_BIN_CHAR_TEST_LENGTH;
+                    s = new String(binCharTestArr, startOpIdx, endOpIdx - 
startOpIdx);
                 }
-                if (endOpIdx != -1 && startOpIdx != -1 && endOpIdx - 
startOpIdx > 3)
+                LOG.debug("startOpIdx: {} endOpIdx: {} s = {}", startOpIdx, 
endOpIdx, s);
+                // a PDF operator is 1-3 bytes long
+                if (endOpIdx - startOpIdx > 3)
                 {
-                    noBinData = false;
+                    noBinData = false; // "operator" too long, assume binary 
data
                 }
             }
             source.rewind(readBytes);
@@ -386,8 +392,8 @@ public class PDFStreamParser extends Bas
         if (!noBinData)
         {
             LOG.warn(
-                    "ignoring 'EI' assumed to be in the middle of inline image 
at stream offset {}",
-                    source.getPosition());
+                    "ignoring 'EI' assumed to be in the middle of inline image 
at stream offset {}, s = '{}'",
+                    source.getPosition(), s);
         }
         return noBinData;
     }


Reply via email to