Author: msahyoun
Date: Fri Mar 20 11:47:45 2026
New Revision: 1932405
Log:
PDFBOX-6178, PDFBOX-4076: handle #00 as valid; see testfile by Acrobat
Modified:
pdfbox/trunk/pdfbox/pom.xml
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
Modified: pdfbox/trunk/pdfbox/pom.xml
==============================================================================
--- pdfbox/trunk/pdfbox/pom.xml Fri Mar 20 10:57:21 2026 (r1932404)
+++ pdfbox/trunk/pdfbox/pom.xml Fri Mar 20 11:47:45 2026 (r1932405)
@@ -933,6 +933,19 @@
<sha512>d39486af0614bd099167a6adaab833aed41a0ebec7b85b13b382a2fdb6fddbcaaea9ab26ed0a81b72822258c8dd66dd535fa5c76afd1e5a8b1bff7d81e890274</sha512>
</configuration>
</execution>
+ <execution>
+ <id>PDFBOX-6178-1</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>wget</goal>
+ </goals>
+ <configuration>
+
<url>https://issues.apache.org/jira/secure/attachment/13081311/form_selected_ASCII_NUL_acrobat.pdf</url>
+
<outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+ <outputFileName>PDFBOX-6178-1.pdf</outputFileName>
+
<sha512>83bc557e6f7d3e98de6e81168b2e2fb3def5025cc5fab1ddb3ef658505351c615253587f09b93503bb73a6225f3d3898be894e9d05dba8d464f4dd9c54514bc3</sha512>
+ </configuration>
+ </execution>
</executions>
</plugin>
</plugins>
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
Fri Mar 20 10:57:21 2026 (r1932404)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
Fri Mar 20 11:47:45 2026 (r1932405)
@@ -715,15 +715,6 @@ public final class COSName extends COSBa
*/
public static COSName getPDFName(byte[] bytes)
{
- for (byte b : bytes)
- {
- if (b == 0)
- {
- throw new IllegalArgumentException(
- "PDF name bytes must not contain null (0x00)
characters");
- }
- }
-
// Wrap for lookup only to avoid unnecessary copying of the byte array
for the key.
ByteBuffer lookupKey = ByteBuffer.wrap(bytes);
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
==============================================================================
---
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Fri Mar 20 10:57:21 2026 (r1932404)
+++
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
Fri Mar 20 11:47:45 2026 (r1932405)
@@ -1852,19 +1852,7 @@ public class COSParser extends BaseParse
String hex = Character.toString((char) ch1) + (char) ch2;
try
{
- // Beginning with PDF 1.2 a name object is an atomic
symbol uniquely defined by a
- // sequence of any characters (8-bit values) except
null (character code 0)
- // Although not explicitly stated in the PDF
specification, it is generally accepted that
- // the # escape sequence is used to represent
characters that are not allowed in a name object,
- // such as the null byte (0x00). Therefore, we will
throw an IOException if we encounter a #00 sequence
- // in a name object, as this would indicate an invalid
name according to the PDF specification.
- // ASCII NUL (0x00) is already handled in
BaseParser#isEndOfName
- int decoded = Integer.parseInt(hex, 16);
- if (decoded == 0)
- {
- throw new IOException("PDF name must not contain
null byte (0x00), found #00 at offset " + source.getPosition());
- }
- buffer.write(decoded);
+ buffer.write(Integer.parseInt(hex, 16));
}
catch (NumberFormatException e)
{
Modified:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
Fri Mar 20 10:57:21 2026 (r1932404)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
Fri Mar 20 11:47:45 2026 (r1932405)
@@ -17,7 +17,6 @@
package org.apache.pdfbox.cos;
import static org.junit.jupiter.api.Assertions.assertEquals;
-import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayOutputStream;
@@ -34,16 +33,6 @@ class TestCOSName
{
private static final File TARGETPDFDIR = new File("target/pdfs");
- @Test
- void testNullByteRejection()
- {
- // Null bytes should not be allowed in name bytes
- byte[] bytesWithNull = new byte[] { 'N', 'a', 'm', 'e', 0, 'X' };
- assertThrows(IllegalArgumentException.class, () -> {
- COSName.getPDFName(bytesWithNull);
- }, "getPDFName should reject bytes containing null (0x00)");
- }
-
/**
* PDFBOX-4076: Check that characters outside of US_ASCII are not replaced
with "?".
*
@@ -72,7 +61,7 @@ class TestCOSName
}
/**
- * PDFBOX-4076: Check that characters outside of US_ASCII are not replaced
with "?".
+ * PDFBOX-6178: Ensure that names with escape sequences #xx are written as
is.
*
* @throws IOException
*/
@@ -103,4 +92,35 @@ class TestCOSName
System.out.println(writtenKeys);
}
}
+
+ /**
+ * PDFBOX-6178: Ensure that names with escape sequences #xx are written as
is.
+ *
+ * @throws IOException
+ */
+ @Test
+ void NameWithASCII_NUL() throws IOException
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ try (PDDocument document = Loader.loadPDF(new
File(TARGETPDFDIR,"PDFBOX-6178-1.pdf"))) {
+ PDField field = document.getDocumentCatalog()
+ .getAcroForm(null)
+ .getField("Geschlecht");
+
+ field.getWidgets()
+ .get(0).getAppearance().getNormalAppearance().getCOSObject()
+ .keySet().forEach(k -> {
+ try {
+ k.writePDF(baos);
+ } catch (IOException e) {
+ // ignored
+ }
+ });
+
+ String writtenKeys = new String(baos.toByteArray(), "UTF-8");
+ assertTrue(writtenKeys.contains("/m#00nnlich"), "Output should be
/m#00nnlich (with 0xE4 as hex escape)");
+ System.out.println(writtenKeys);
+ }
+ }
}