Author: msahyoun
Date: Fri Mar 20 10:01:30 2026
New Revision: 1932403

Log:
PDFBOX-6178, PDFBOX-4076: add tests; partially created with Claude Haiku 4.5

Modified:
   pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
   
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java

Modified: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java    
Fri Mar 20 10:00:05 2026        (r1932402)
+++ pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java    
Fri Mar 20 10:01:30 2026        (r1932403)
@@ -17,6 +17,7 @@
 package org.apache.pdfbox.cos;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertThrows;
 import static org.junit.jupiter.api.Assertions.assertTrue;
 
 import java.io.ByteArrayOutputStream;
@@ -33,6 +34,16 @@ class TestCOSName
 {
     private static final File TARGETPDFDIR = new File("target/pdfs");
 
+    @Test
+    void testNullByteRejection()
+    {
+        // Null bytes should not be allowed in name bytes
+        byte[] bytesWithNull = new byte[] { 'N', 'a', 'm', 'e', 0, 'X' };
+        assertThrows(IllegalArgumentException.class, () -> {
+            COSName.getPDFName(bytesWithNull);
+        }, "getPDFName should reject bytes containing null (0x00)");
+    }
+
     /**
      * PDFBOX-4076: Check that characters outside of US_ASCII are not replaced 
with "?".
      * 

Modified: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
    Fri Mar 20 10:00:05 2026        (r1932402)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
    Fri Mar 20 10:01:30 2026        (r1932403)
@@ -20,7 +20,9 @@ package org.apache.pdfbox.pdfparser;
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.io.IOException;
+import java.nio.charset.StandardCharsets;
 
+import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.io.RandomAccessReadBuffer;
 import org.junit.jupiter.api.Test;
@@ -88,4 +90,285 @@ class TestCOSParser
         assertEquals(output, cosString.getString());
     }
 
+    // COSName parsing tests based on examples from PDF 32000-1:2008, Table 4, 
Section 7.3.5
+
+    @Test
+    void testTable4Example_Name1() throws IOException
+    {
+        // /Name1 → "Name1"
+        byte[] inputBytes = "/Name1 ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("Name1", name.getName());
+    }
+
+    @Test
+    void testTable4Example_ASomewhatLongerName() throws IOException
+    {
+        // /ASomewhatLongerName → "ASomewhatLongerName"
+        byte[] inputBytes = "/ASomewhatLongerName 
".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("ASomewhatLongerName", name.getName());
+    }
+
+    @Test
+    void testTable4Example_WithSpecialCharacters() throws IOException
+    {
+        // /A;Name_With-Various***Characters? → 
"A;Name_With-Various***Characters?"
+        byte[] inputBytes = "/A;Name_With-Various***Characters? 
".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("A;Name_With-Various***Characters?", name.getName());
+    }
+
+    @Test
+    void testTable4Example_Numeric() throws IOException
+    {
+        // /1.2 → "1.2"
+        byte[] inputBytes = "/1.2 ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("1.2", name.getName());
+    }
+
+    @Test
+    void testTable4Example_DollarSigns() throws IOException
+    {
+        // /$$ → "$$"
+        byte[] inputBytes = "/$$ ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("$$", name.getName());
+    }
+
+    @Test
+    void testTable4Example_AtPattern() throws IOException
+    {
+        // /@pattern → "@pattern"
+        byte[] inputBytes = "/@pattern ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("@pattern", name.getName());
+    }
+
+    @Test
+    void testTable4Example_DotNotdef() throws IOException
+    {
+        // /.notdef → ".notdef" (space is 0x20, hex-encoded as #20)
+        byte[] inputBytes = "/#2Enotdef ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals(".notdef", name.getName());
+    }
+
+    @Test
+    void testTable4Example_HexEncodedSpace() throws IOException
+    {
+        // /lime#20Green → "lime Green"
+        byte[] inputBytes = "/lime#20Green 
".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("lime Green", name.getName());
+    }
+
+    @Test
+    void testTable4Example_HexEncodedParentheses() throws IOException
+    {
+        // /paired#28#29parentheses → "paired()parentheses"
+        // (#28 = '(', #29 = ')')
+        byte[] inputBytes = "/paired#28#29parentheses 
".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("paired()parentheses", name.getName());
+    }
+
+    @Test
+    void testTable4Example_HexEncodedNumberSign() throws IOException
+    {
+        // /The_Key_of_F#23_Minor → "The_Key_of_F#_Minor"
+        // (#23 = '#')
+        byte[] inputBytes = "/The_Key_of_F#23_Minor 
".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("The_Key_of_F#_Minor", name.getName());
+    }
+
+    @Test
+    void testTable4Example_HexEncodedLetter() throws IOException
+    {
+        // /A#42 → "AB" (note #42 = 'B')
+        byte[] inputBytes = "/A#42 ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("AB", name.getName());
+    }
+
+    @Test
+    void testTable4Example_EmptyName() throws IOException
+    {
+        // / → "" (empty name is valid per spec)
+        byte[] inputBytes = "/ ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("", name.getName());
+    }
+
+    @Test
+    void testNullCharacterTermination() throws IOException
+    {
+        // /Name\0Extra should parse as "Name" and stop at null
+        byte[] inputBytes = new byte[] { '/', 'N', 'a', 'm', 'e', 0, 'E', 'x', 
't', 'r', 'a', ' ' };
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("Name", name.getName());
+    }
+
+    @Test
+    void testInvalidHexSequence() throws IOException
+    {
+        // /Name#GG should keep #G literally since G is not a valid hex digit
+        byte[] inputBytes = "/Name#GG ".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        // When # is not followed by two hex digits, both chars are kept 
literally
+        assertEquals("Name#GG", name.getName());
+    }
+
+    @Test
+    void testHexEscapeLowercase() throws IOException
+    {
+        // /Name#2fTest (lowercase hex #2f = '/')
+        byte[] inputBytes = "/Name#2fTest 
".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("Name/Test", name.getName());
+    }
+
+    @Test
+    void testHexEscapeUppercase() throws IOException
+    {
+        // /Name#2FTest (uppercase hex #2F = '/')
+        byte[] inputBytes = "/Name#2FTest 
".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("Name/Test", name.getName());
+    }
+
+    @Test
+    void testNameTerminationByDelimiters() throws IOException
+    {
+        // Test termination by '>'
+        byte[] inputBytes = "/Name1>".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        assertEquals("Name1", name.getName());
+
+        // Test termination by '<'
+        inputBytes = "/Name2<".getBytes(StandardCharsets.US_ASCII);
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        name = cosParser.parseCOSName();
+        assertEquals("Name2", name.getName());
+
+        // Test termination by '['
+        inputBytes = "/Name3[".getBytes(StandardCharsets.US_ASCII);
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        name = cosParser.parseCOSName();
+        assertEquals("Name3", name.getName());
+
+        // Test termination by ']'
+        inputBytes = "/Name4]".getBytes(StandardCharsets.US_ASCII);
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        name = cosParser.parseCOSName();
+        assertEquals("Name4", name.getName());
+
+        // Test termination by '('
+        inputBytes = "/Name5(".getBytes(StandardCharsets.US_ASCII);
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        name = cosParser.parseCOSName();
+        assertEquals("Name5", name.getName());
+
+        // Test termination by ')'
+        inputBytes = "/Name6)".getBytes(StandardCharsets.US_ASCII);
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        name = cosParser.parseCOSName();
+        assertEquals("Name6", name.getName());
+
+        // Test termination by '/'
+        inputBytes = "/Name7/".getBytes(StandardCharsets.US_ASCII);
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        name = cosParser.parseCOSName();
+        assertEquals("Name7", name.getName());
+
+        // Test termination by '%'
+        inputBytes = "/Name8%".getBytes(StandardCharsets.US_ASCII);
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        name = cosParser.parseCOSName();
+        assertEquals("Name8", name.getName());
+    }
+
+    @Test
+    void testASCIIRegularCharacters() throws IOException
+    {
+        // Test a range of ASCII characters that are not delimiters
+        // PDF delimiters that terminate name parsing: whitespace, <, >, [, ], 
{, }, /, %, (, )
+        byte[] inputBytes = 
"/!\"$'*+-._:;=@~^`|\\".getBytes(StandardCharsets.US_ASCII);
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSName name = cosParser.parseCOSName();
+        // All these non-delimiter characters should be preserved
+        assertEquals("!\"$'*+-._:;=@~^`|\\", name.getName());
+    }
+
+    @Test
+    void testUTF8InNames()
+    {
+        // Create a name with UTF-8 encoded characters
+        String nameStr = "Test中国";
+        byte[] nameBytes = nameStr.getBytes(StandardCharsets.UTF_8);
+        COSName name = COSName.getPDFName(nameBytes);
+        
+        // The name should preserve the UTF-8 bytes
+        byte[] retrievedBytes = name.getBytes();
+        // Verify by recreating the string
+        String retrievedStr = new String(retrievedBytes, 
StandardCharsets.UTF_8);
+        assertEquals(nameStr, retrievedStr);
+    }
+
+    @Test
+    void testNameCanonicaliation()
+    {
+        byte[] bytes1 = "TestName".getBytes(StandardCharsets.US_ASCII);
+        byte[] bytes2 = "TestName".getBytes(StandardCharsets.US_ASCII);
+        
+        COSName name1 = COSName.getPDFName(bytes1);
+        COSName name2 = COSName.getPDFName(bytes2);
+        
+        // Same bytes should return references to identical object
+        assertEquals(name1, name2);
+    }
 }

Reply via email to