Author: tilman
Date: Mon Dec  1 10:30:59 2025
New Revision: 1930160

Log:
PDFBOX-5660: optimize, as suggested by Valery Bokov, closes #357

Modified:
   
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java

Modified: 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
   Mon Dec  1 10:30:55 2025        (r1930159)
+++ 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
   Mon Dec  1 10:30:59 2025        (r1930160)
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.nio.charset.StandardCharsets;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.regex.Pattern;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.pdfbox.contentstream.PDContentStream;
@@ -46,6 +47,7 @@ public class PDFStreamParser extends Bas
      */
     private static final Log LOG = LogFactory.getLog(PDFStreamParser.class);
 
+    private static final Pattern NUMBER_PATTERN = 
Pattern.compile("^\\d*\\.?\\d*$");
     private static final int MAX_BIN_CHAR_TEST_LENGTH = 10;
     private final byte[] binCharTestArr = new byte[MAX_BIN_CHAR_TEST_LENGTH];
     private int inlineImageDepth = 0;
@@ -387,7 +389,7 @@ public class PDFStreamParser extends Bas
                 // or a number (PDFBOX-5957)
                 s = new String(binCharTestArr, startOpIdx, endOpIdx - 
startOpIdx, StandardCharsets.US_ASCII);
                 if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s) &&
-                    !s.matches("^\\d*\\.?\\d*$"))
+                    !NUMBER_PATTERN.matcher(s).find())
                 {
                     // operator is not Q, not EMC, not S, nor a number -> 
assume binary data
                     noBinData = false;
@@ -403,7 +405,7 @@ public class PDFStreamParser extends Bas
                     s = new String(binCharTestArr, startOpIdx, endOpIdx - 
startOpIdx, StandardCharsets.US_ASCII);
                 }
                 // look for token of 3 chars max or a number
-                if (endOpIdx - startOpIdx > 3 && !s.matches("^\\d*\\.?\\d*$"))
+                if (endOpIdx - startOpIdx > 3 && 
!NUMBER_PATTERN.matcher(s).find())
                 {
                     noBinData = false; // "operator" too long, assume binary 
data
                 }

Reply via email to