Author: tilman
Date: Sat Feb 15 11:22:04 2025
New Revision: 1923839

URL: http://svn.apache.org/viewvc?rev=1923839&view=rev
Log:
PDFBOX-5957: detect number after EI

Modified:
    
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
URL: 
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java?rev=1923839&r1=1923838&r2=1923839&view=diff
==============================================================================
--- 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 (original)
+++ 
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
 Sat Feb 15 11:22:04 2025
@@ -400,11 +400,13 @@ public class PDFStreamParser extends Bas
             // PDFBOX-3742: just assuming that 1-3 non blanks is a PDF 
operator isn't enough
             if (noBinData && endOpIdx != -1 && startOpIdx != -1)
             {
-                // usually, the operator here is Q, sometimes EMC 
(PDFBOX-2376), S (PDFBOX-3784)
+                // usually, the operator here is Q, sometimes EMC 
(PDFBOX-2376), S (PDFBOX-3784),
+                // or a number (PDFBOX-5957)
                 s = new String(binCharTestArr, startOpIdx, endOpIdx - 
startOpIdx);
-                if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s))
+                if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s) &&
+                    !s.matches("^\\d*\\.?\\d*$"))
                 {
-                    // operator is not Q, not EMC, not S -> assume binary data
+                    // operator is not Q, not EMC, not S, nur a number -> 
assume binary data
                     noBinData = false;
                 }
             }
@@ -417,8 +419,8 @@ public class PDFStreamParser extends Bas
                     endOpIdx = MAX_BIN_CHAR_TEST_LENGTH;
                     s = new String(binCharTestArr, startOpIdx, endOpIdx - 
startOpIdx);
                 }
-                // a PDF operator is 1-3 bytes long
-                if (endOpIdx - startOpIdx > 3)
+                // look for token of 3 chars max or a number
+                if (endOpIdx - startOpIdx > 3 && !s.matches("^\\d*\\.?\\d*$"))
                 {
                     noBinData = false; // "operator" too long, assume binary 
data
                 }


Reply via email to