Author: msahyoun
Date: Fri Mar 20 10:00:05 2026
New Revision: 1932402
Log:
PDFBOX-6178, PDFBOX-4076: handle #00 as invalid
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Fri Mar 20 08:30:13 2026 (r1932401)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Fri Mar 20 10:00:05 2026 (r1932402)
@@ -1852,7 +1852,19 @@ public class COSParser extends BaseParse
String hex = Character.toString((char) ch1) + (char) ch2;
try
{
- buffer.write(Integer.parseInt(hex, 16));
+ // Beginning with PDF 1.2 a name object is an atomic
symbol uniquely defined by a
+ // sequence of any characters (8-bit values) except
null (character code 0)
+ // Although not explicitly stated in the PDF
specification, it is generally accepted that
+ // the # escape sequence is used to represent
characters that are not allowed in a name object,
+ // such as the null byte (0x00). Therefore, we will
throw an IOException if we encounter a #00 sequence
+ // in a name object, as this would indicate an invalid
name according to the PDF specification.
+ // ASCII NUL (0x00) is already handled in
BaseParser#isEndOfName
+ int decoded = Integer.parseInt(hex, 16);
+ if (decoded == 0)
+ {
+ throw new IOException("PDF name must not contain
null byte (0x00), found #00 at offset " + source.getPosition());
+ }
+ buffer.write(decoded);
}
catch (NumberFormatException e)
{