Author: tilman
Date: Mon Dec 1 10:30:55 2025
New Revision: 1930159
Log:
PDFBOX-5660: optimize, as suggested by Valery Bokov, closes #357
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
==============================================================================
---
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Mon Dec 1 10:30:51 2025 (r1930158)
+++
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
Mon Dec 1 10:30:55 2025 (r1930159)
@@ -20,6 +20,7 @@ import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
+import java.util.regex.Pattern;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.pdfbox.contentstream.PDContentStream;
@@ -50,6 +51,7 @@ public class PDFStreamParser extends Bas
private final List<Object> streamObjects = new ArrayList<Object>( 100 );
+ private static final Pattern NUMBER_PATTERN =
Pattern.compile("^\\d*\\.?\\d*$");
private static final int MAX_BIN_CHAR_TEST_LENGTH = 10;
private final byte[] binCharTestArr = new byte[MAX_BIN_CHAR_TEST_LENGTH];
private int inlineImageDepth = 0;
@@ -426,7 +428,7 @@ public class PDFStreamParser extends Bas
// or a number (PDFBOX-5957)
s = new String(binCharTestArr, startOpIdx, endOpIdx -
startOpIdx, Charsets.US_ASCII);
if (!"Q".equals(s) && !"EMC".equals(s) && !"S".equals(s) &&
- !s.matches("^\\d*\\.?\\d*$"))
+ !NUMBER_PATTERN.matcher(s).find())
{
// operator is not Q, not EMC, not S, nor a number ->
assume binary data
noBinData = false;
@@ -442,7 +444,7 @@ public class PDFStreamParser extends Bas
s = new String(binCharTestArr, startOpIdx, endOpIdx -
startOpIdx, Charsets.US_ASCII);
}
// look for token of 3 chars max or a number
- if (endOpIdx - startOpIdx > 3 && !s.matches("^\\d*\\.?\\d*$"))
+ if (endOpIdx - startOpIdx > 3 &&
!NUMBER_PATTERN.matcher(s).find())
{
noBinData = false; // "operator" too long, assume binary
data
}