Author: msahyoun
Date: Fri Mar 20 10:01:30 2026
New Revision: 1932403
Log:
PDFBOX-6178, PDFBOX-4076: add tests; partially created with Claude Haiku 4.5
Modified:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
Modified:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
Fri Mar 20 10:00:05 2026 (r1932402)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
Fri Mar 20 10:01:30 2026 (r1932403)
@@ -17,6 +17,7 @@
package org.apache.pdfbox.cos;
import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayOutputStream;
@@ -33,6 +34,16 @@ class TestCOSName
{
private static final File TARGETPDFDIR = new File("target/pdfs");
+ @Test
+ void testNullByteRejection()
+ {
+ // Null bytes should not be allowed in name bytes
+ byte[] bytesWithNull = new byte[] { 'N', 'a', 'm', 'e', 0, 'X' };
+ assertThrows(IllegalArgumentException.class, () -> {
+ COSName.getPDFName(bytesWithNull);
+ }, "getPDFName should reject bytes containing null (0x00)");
+ }
+
/**
* PDFBOX-4076: Check that characters outside of US_ASCII are not replaced
with "?".
*
Modified:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
==============================================================================
---
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
Fri Mar 20 10:00:05 2026 (r1932402)
+++
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
Fri Mar 20 10:01:30 2026 (r1932403)
@@ -20,7 +20,9 @@ package org.apache.pdfbox.pdfparser;
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.junit.jupiter.api.Test;
@@ -88,4 +90,285 @@ class TestCOSParser
assertEquals(output, cosString.getString());
}
+ // COSName parsing tests based on examples from PDF 32000-1:2008, Table 4,
Section 7.3.5
+
+ @Test
+ void testTable4Example_Name1() throws IOException
+ {
+ // /Name1 → "Name1"
+ byte[] inputBytes = "/Name1 ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name1", name.getName());
+ }
+
+ @Test
+ void testTable4Example_ASomewhatLongerName() throws IOException
+ {
+ // /ASomewhatLongerName → "ASomewhatLongerName"
+ byte[] inputBytes = "/ASomewhatLongerName
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("ASomewhatLongerName", name.getName());
+ }
+
+ @Test
+ void testTable4Example_WithSpecialCharacters() throws IOException
+ {
+ // /A;Name_With-Various***Characters? →
"A;Name_With-Various***Characters?"
+ byte[] inputBytes = "/A;Name_With-Various***Characters?
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("A;Name_With-Various***Characters?", name.getName());
+ }
+
+ @Test
+ void testTable4Example_Numeric() throws IOException
+ {
+ // /1.2 → "1.2"
+ byte[] inputBytes = "/1.2 ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("1.2", name.getName());
+ }
+
+ @Test
+ void testTable4Example_DollarSigns() throws IOException
+ {
+ // /$$ → "$$"
+ byte[] inputBytes = "/$$ ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("$$", name.getName());
+ }
+
+ @Test
+ void testTable4Example_AtPattern() throws IOException
+ {
+ // /@pattern → "@pattern"
+ byte[] inputBytes = "/@pattern ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("@pattern", name.getName());
+ }
+
+ @Test
+ void testTable4Example_DotNotdef() throws IOException
+ {
+ // /.notdef → ".notdef" (space is 0x20, hex-encoded as #20)
+ byte[] inputBytes = "/#2Enotdef ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals(".notdef", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedSpace() throws IOException
+ {
+ // /lime#20Green → "lime Green"
+ byte[] inputBytes = "/lime#20Green
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("lime Green", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedParentheses() throws IOException
+ {
+ // /paired#28#29parentheses → "paired()parentheses"
+ // (#28 = '(', #29 = ')')
+ byte[] inputBytes = "/paired#28#29parentheses
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("paired()parentheses", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedNumberSign() throws IOException
+ {
+ // /The_Key_of_F#23_Minor → "The_Key_of_F#_Minor"
+ // (#23 = '#')
+ byte[] inputBytes = "/The_Key_of_F#23_Minor
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("The_Key_of_F#_Minor", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedLetter() throws IOException
+ {
+ // /A#42 → "AB" (note #42 = 'B')
+ byte[] inputBytes = "/A#42 ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("AB", name.getName());
+ }
+
+ @Test
+ void testTable4Example_EmptyName() throws IOException
+ {
+ // / → "" (empty name is valid per spec)
+ byte[] inputBytes = "/ ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("", name.getName());
+ }
+
+ @Test
+ void testNullCharacterTermination() throws IOException
+ {
+ // /Name\0Extra should parse as "Name" and stop at null
+ byte[] inputBytes = new byte[] { '/', 'N', 'a', 'm', 'e', 0, 'E', 'x',
't', 'r', 'a', ' ' };
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name", name.getName());
+ }
+
+ @Test
+ void testInvalidHexSequence() throws IOException
+ {
+ // /Name#GG should keep #G literally since G is not a valid hex digit
+ byte[] inputBytes = "/Name#GG ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ // When # is not followed by two hex digits, both chars are kept
literally
+ assertEquals("Name#GG", name.getName());
+ }
+
+ @Test
+ void testHexEscapeLowercase() throws IOException
+ {
+ // /Name#2fTest (lowercase hex #2f = '/')
+ byte[] inputBytes = "/Name#2fTest
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name/Test", name.getName());
+ }
+
+ @Test
+ void testHexEscapeUppercase() throws IOException
+ {
+ // /Name#2FTest (uppercase hex #2F = '/')
+ byte[] inputBytes = "/Name#2FTest
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name/Test", name.getName());
+ }
+
+ @Test
+ void testNameTerminationByDelimiters() throws IOException
+ {
+ // Test termination by '>'
+ byte[] inputBytes = "/Name1>".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name1", name.getName());
+
+ // Test termination by '<'
+ inputBytes = "/Name2<".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name2", name.getName());
+
+ // Test termination by '['
+ inputBytes = "/Name3[".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name3", name.getName());
+
+ // Test termination by ']'
+ inputBytes = "/Name4]".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name4", name.getName());
+
+ // Test termination by '('
+ inputBytes = "/Name5(".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name5", name.getName());
+
+ // Test termination by ')'
+ inputBytes = "/Name6)".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name6", name.getName());
+
+ // Test termination by '/'
+ inputBytes = "/Name7/".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name7", name.getName());
+
+ // Test termination by '%'
+ inputBytes = "/Name8%".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name8", name.getName());
+ }
+
+ @Test
+ void testASCIIRegularCharacters() throws IOException
+ {
+ // Test a range of ASCII characters that are not delimiters
+ // PDF delimiters that terminate name parsing: whitespace, <, >, [, ],
{, }, /, %, (, )
+ byte[] inputBytes =
"/!\"$'*+-._:;=@~^`|\\".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ // All these non-delimiter characters should be preserved
+ assertEquals("!\"$'*+-._:;=@~^`|\\", name.getName());
+ }
+
+ @Test
+ void testUTF8InNames()
+ {
+ // Create a name with UTF-8 encoded characters
+ String nameStr = "Test中国";
+ byte[] nameBytes = nameStr.getBytes(StandardCharsets.UTF_8);
+ COSName name = COSName.getPDFName(nameBytes);
+
+ // The name should preserve the UTF-8 bytes
+ byte[] retrievedBytes = name.getBytes();
+ // Verify by recreating the string
+ String retrievedStr = new String(retrievedBytes,
StandardCharsets.UTF_8);
+ assertEquals(nameStr, retrievedStr);
+ }
+
+ @Test
+ void testNameCanonicaliation()
+ {
+ byte[] bytes1 = "TestName".getBytes(StandardCharsets.US_ASCII);
+ byte[] bytes2 = "TestName".getBytes(StandardCharsets.US_ASCII);
+
+ COSName name1 = COSName.getPDFName(bytes1);
+ COSName name2 = COSName.getPDFName(bytes2);
+
+ // Same bytes should return references to identical object
+ assertEquals(name1, name2);
+ }
}