Author: msahyoun
Date: Fri Mar 20 14:58:50 2026
New Revision: 1932416
Log:
PDFBOX-6178, PDFBOX-4076: use byte[] instead of String for internal storage in
COSName; backport from trunk
Modified:
pdfbox/branches/3.0/pdfbox/pom.xml
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
Modified: pdfbox/branches/3.0/pdfbox/pom.xml
==============================================================================
--- pdfbox/branches/3.0/pdfbox/pom.xml Fri Mar 20 14:54:14 2026
(r1932415)
+++ pdfbox/branches/3.0/pdfbox/pom.xml Fri Mar 20 14:58:50 2026
(r1932416)
@@ -901,10 +901,35 @@
<sha512>60e7b46f11a655083a57f4a627edf75bee477cd1ebfb06fbaefeb8ba16a01be37039cfd51c7c181a3ea368b53d22e327d480a7b0699c0edb2e06b4faaae7790f</sha512>
</configuration>
</execution>
+ <execution>
+ <id>PDFBOX-6178</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>wget</goal>
+ </goals>
+ <configuration>
+
<url>https://issues.apache.org/jira/secure/attachment/13081297/form_empty.pdf</url>
+
<outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+ <outputFileName>PDFBOX-6178.pdf</outputFileName>
+
<sha512>d39486af0614bd099167a6adaab833aed41a0ebec7b85b13b382a2fdb6fddbcaaea9ab26ed0a81b72822258c8dd66dd535fa5c76afd1e5a8b1bff7d81e890274</sha512>
+ </configuration>
+ </execution>
+ <execution>
+ <id>PDFBOX-6178-1</id>
+ <phase>generate-test-resources</phase>
+ <goals>
+ <goal>wget</goal>
+ </goals>
+ <configuration>
+
<url>https://issues.apache.org/jira/secure/attachment/13081311/form_selected_ASCII_NUL_acrobat.pdf</url>
+
<outputDirectory>${project.build.directory}/pdfs</outputDirectory>
+ <outputFileName>PDFBOX-6178-1.pdf</outputFileName>
+
<sha512>83bc557e6f7d3e98de6e81168b2e2fb3def5025cc5fab1ddb3ef658505351c615253587f09b93503bb73a6225f3d3898be894e9d05dba8d464f4dd9c54514bc3</sha512>
+ </configuration>
+ </execution>
</executions>
</plugin>
</plugins>
</build>
</project>
-
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
==============================================================================
--- pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
Fri Mar 20 14:54:14 2026 (r1932415)
+++ pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java
Fri Mar 20 14:58:50 2026 (r1932416)
@@ -18,7 +18,9 @@ package org.apache.pdfbox.cos;
import java.io.IOException;
import java.io.OutputStream;
+import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
@@ -33,11 +35,11 @@ import org.apache.pdfbox.util.Hex;
public final class COSName extends COSBase implements Comparable<COSName>
{
// using ConcurrentHashMap because this can be accessed by multiple threads
- private static final Map<String, COSName> nameMap = new
ConcurrentHashMap<>(8192);
+ private static final Map<ByteBuffer, COSName> nameMap = new
ConcurrentHashMap<>(8192);
// all common COSName values are stored in this HashMap
// they are already defined as static constants and don't need to be
synchronized
- private static final Map<String, COSName> commonNameMap = new
HashMap<>(768);
+ private static final Map<ByteBuffer, COSName> commonNameMap = new
HashMap<>(768);
//
// IMPORTANT: this list is *alphabetized* and does not need any JavaDoc
@@ -674,7 +676,28 @@ public final class COSName extends COSBa
public static final COSName ZA_DB = new COSName("ZaDb");
// fields
- private final String name;
+
+ /**
+ * <p>Per PDF 32000-1:2008 §7.3.5: Beginning with PDF 1.2 a name object is
an atomic symbol
+ * uniquely defined by a sequence of any characters (8-bit values) except
null
+ * (character code 0).</p>
+ */
+ private final byte[] nameBytes;
+
+ /**
+ * Returns a {@code COSName} whose byte sequence is the UTF-8 encoding of
{@code aName}.
+ *
+ * <p>This is the standard factory for names defined in Java source code
(e.g. the static
+ * constants above). All well-formed PDF names defined by the spec are
ASCII, so the UTF-8
+ * encoding is a transparent identity transform for those cases.</p>
+ *
+ * @param aName the name string; must not be {@code null}
+ * @return a canonicalised {@code COSName} instance
+ */
+ public static COSName getPDFName(String aName)
+ {
+ return getPDFName(aName.getBytes(StandardCharsets.UTF_8));
+ }
/**
* This will get a COSName object with that name.
@@ -683,21 +706,24 @@ public final class COSName extends COSBa
*
* @return A COSName with the specified name.
*/
- public static COSName getPDFName(String aName)
+ public static COSName getPDFName(byte[] bytes)
{
COSName name = null;
- if (aName != null)
+ if (bytes != null)
{
+ // Wrap for lookup only to avoid unnecessary copying of the byte
array for the key.
+ ByteBuffer lookupKey = ByteBuffer.wrap(bytes);
+
// Is it a common COSName ??
- name = commonNameMap.get(aName);
+ name = commonNameMap.get(lookupKey);
if (name == null)
{
// It seems to be a document specific COSName
- name = nameMap.get(aName);
+ name = nameMap.get(lookupKey);
if (name == null)
{
// name is added to the synchronized map in the constructor
- name = new COSName(aName, false);
+ name = new COSName(bytes, false);
}
}
}
@@ -707,33 +733,64 @@ public final class COSName extends COSBa
/**
* Private constructor. This will limit the number of COSName objects.
that are created.
*
- * @param aName The name of the COSName object.
+ * @param storedBytes The the raw byte sequence that defines this name.
* @param staticValue Indicates if the COSName object is static so that it
can be stored in the HashMap without
* synchronizing.
*/
- private COSName(String aName, boolean staticValue)
+ private COSName(byte[] bytes, boolean staticValue)
{
- name = aName;
+ // Denesive copy which is OK to share as the key and the nameBytes
+ // of the COSName are immutable.
+ byte[] storedBytes = Arrays.copyOf(bytes, bytes.length);
+ ByteBuffer storedKey = ByteBuffer.wrap(storedBytes);
+
+ this.nameBytes = storedBytes;
+
if (staticValue)
{
- commonNameMap.put(aName, this);
+ commonNameMap.put(storedKey, this);
}
else
{
- nameMap.put(aName, this);
+ nameMap.put(storedKey, this);
}
}
/**
* Private constructor. This will limit the number of COSName objects.
that are created.
*
- * @param aName The name of the COSName object.
+ * @param storedBytes The the raw byte sequence that defines this name.
+ */
+ private COSName(byte[] storedBytes)
+ {
+ this(storedBytes, false);
+ }
+
+ /**
+ * Private constructor. This will limit the number of COSName objects.
that are created.
+ *
+ * @param storedBytes The the raw byte sequence that defines this name.
*/
private COSName(String aName)
{
- this(aName, true);
+ this(aName.getBytes(StandardCharsets.UTF_8), true);
+ }
+
+ /**
+ * Returns the raw byte sequence that defines this name.
+ *
+ * <p>This is the atomic content/identity of the name. Prefer this over
+ * {@link #getName()} whenever you need to write name bytes to an output
stream, compare names
+ * parsed from a PDF, or otherwise operate at the byte level.</p>
+ *
+ * @return a defensive copy of the internal byte array; never {@code null}
+ */
+ public byte[] getBytes()
+ {
+ return Arrays.copyOf(nameBytes, nameBytes.length);
}
+
/**
* This will get the name of this COSName object.
*
@@ -741,31 +798,70 @@ public final class COSName extends COSBa
*/
public String getName()
{
- return name;
+ String utf8String = new String(nameBytes, StandardCharsets.UTF_8);
+
+ //check for lossy decoding, which can happen if the name contains
+ // bytes that are not valid UTF-8
+ if (utf8String.indexOf('\uFFFD') >= 0) {
+ // fall back to ISO-8859-1, which is a single-byte encoding that
can decode any
+ // byte sequence without loss
+ return new String(nameBytes, StandardCharsets.ISO_8859_1);
+ }
+ return utf8String;
}
@Override
public String toString()
{
- return "COSName{" + name + "}";
+ return "COSName{" + getName() + "}";
}
@Override
public boolean equals(Object object)
{
- return object instanceof COSName && name.equals(((COSName)
object).name);
+ return object instanceof COSName && Arrays.equals(nameBytes,
((COSName) object).nameBytes);
}
@Override
public int hashCode()
{
- return name.hashCode();
+ return Arrays.hashCode(nameBytes);
}
+ /**
+ * Lexicographic ordering over unsigned byte values.
+ *
+ * <p>Unsigned comparison is used so that bytes with the high bit set
+ * sort after all ASCII bytes, which matches the natural PDF byte
ordering.</p>
+ */
@Override
public int compareTo(COSName other)
{
- return name.compareTo(other.name);
+ if (other == null)
+ {
+ return 1;
+ }
+
+ if (nameBytes == other.nameBytes)
+ {
+ return 0;
+ }
+
+ if (nameBytes == null || other.nameBytes == null)
+ {
+ return nameBytes == null ? -1 : 1;
+ }
+
+ int len = Math.min(nameBytes.length, other.nameBytes.length);
+ for (int i = 0; i < len; i++)
+ {
+ int diff = Byte.toUnsignedInt(nameBytes[i]) -
Byte.toUnsignedInt(other.nameBytes[i]);
+ if (diff != 0)
+ {
+ return diff;
+ }
+ }
+ return nameBytes.length - other.nameBytes.length;
}
/**
@@ -774,7 +870,7 @@ public final class COSName extends COSBa
*/
public boolean isEmpty()
{
- return name.isEmpty();
+ return nameBytes.length == 0;
}
@Override
@@ -792,7 +888,7 @@ public final class COSName extends COSBa
public void writePDF(OutputStream output) throws IOException
{
output.write('/');
- byte[] bytes = getName().getBytes(StandardCharsets.UTF_8);
+ byte[] bytes = getBytes();
for (byte b : bytes)
{
int current = b & 0xFF;
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Fri Mar 20 14:54:14 2026 (r1932415)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
Fri Mar 20 14:58:50 2026 (r1932416)
@@ -935,7 +935,7 @@ public abstract class BaseParser
source.rewind(1);
}
- return COSName.getPDFName(decodeBuffer(buffer));
+ return COSName.getPDFName(buffer.toByteArray());
}
/**
Modified:
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
Fri Mar 20 14:54:14 2026 (r1932415)
+++
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/cos/TestCOSName.java
Fri Mar 20 14:58:50 2026 (r1932416)
@@ -20,15 +20,19 @@ import static org.junit.jupiter.api.Asse
import static org.junit.jupiter.api.Assertions.assertTrue;
import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.IOException;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
+import org.apache.pdfbox.pdmodel.interactive.form.PDField;
import org.junit.jupiter.api.Test;
class TestCOSName
{
+ private static final File TARGETPDFDIR = new File("target/pdfs");
+
/**
* PDFBOX-4076: Check that characters outside of US_ASCII are not replaced
with "?".
*
@@ -56,4 +60,65 @@ class TestCOSName
}
}
+ /**
+ * PDFBOX-6178: Ensure that names with escape sequences #xx are written as
is.
+ *
+ * @throws IOException
+ */
+ @Test
+ void PDFBox6178() throws IOException
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ try (PDDocument document = Loader.loadPDF(new
File(TARGETPDFDIR,"PDFBOX-6178.pdf"))) {
+ PDField field = document.getDocumentCatalog()
+ .getAcroForm(null)
+ .getField("Geschlecht");
+
+ field.setValue("männlich");
+
+ field.getWidgets()
+ .get(0).getAppearance().getNormalAppearance().getCOSObject()
+ .keySet().forEach(k -> {
+ try {
+ k.writePDF(baos);
+ } catch (IOException e) {
+ // ignored
+ }
+ });
+
+ String writtenKeys = new String(baos.toByteArray(), "UTF-8");
+ assertTrue(writtenKeys.contains("/m#E4nnlich"), "Output should be
/m#e4nnlich (with 0xE4 as hex escape)");
+ }
+ }
+
+ /**
+ * PDFBOX-6178: Ensure that names with escape sequences #xx are written as
is.
+ *
+ * @throws IOException
+ */
+ @Test
+ void NameWithASCII_NUL() throws IOException
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+
+ try (PDDocument document = Loader.loadPDF(new
File(TARGETPDFDIR,"PDFBOX-6178-1.pdf"))) {
+ PDField field = document.getDocumentCatalog()
+ .getAcroForm(null)
+ .getField("Geschlecht");
+
+ field.getWidgets()
+ .get(0).getAppearance().getNormalAppearance().getCOSObject()
+ .keySet().forEach(k -> {
+ try {
+ k.writePDF(baos);
+ } catch (IOException e) {
+ // ignored
+ }
+ });
+
+ String writtenKeys = new String(baos.toByteArray(), "UTF-8");
+ assertTrue(writtenKeys.contains("/m#00nnlich"), "Output should be
/m#00nnlich (with 0xE4 as hex escape)");
+ }
+ }
}
Modified:
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
Fri Mar 20 14:54:14 2026 (r1932415)
+++
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
Fri Mar 20 14:58:50 2026 (r1932416)
@@ -22,8 +22,10 @@ import static org.junit.jupiter.api.Asse
import java.io.IOException;
import java.io.InputStream;
+import java.nio.charset.StandardCharsets;
import org.apache.pdfbox.Loader;
+import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSString;
import org.apache.pdfbox.io.RandomAccessReadBuffer;
import org.junit.jupiter.api.Test;
@@ -109,4 +111,285 @@ class TestBaseParser
}
}
+ // COSName parsing tests based on examples from PDF 32000-1:2008, Table 4,
Section 7.3.5
+
+ @Test
+ void testTable4Example_Name1() throws IOException
+ {
+ // /Name1 → "Name1"
+ byte[] inputBytes = "/Name1 ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name1", name.getName());
+ }
+
+ @Test
+ void testTable4Example_ASomewhatLongerName() throws IOException
+ {
+ // /ASomewhatLongerName → "ASomewhatLongerName"
+ byte[] inputBytes = "/ASomewhatLongerName
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("ASomewhatLongerName", name.getName());
+ }
+
+ @Test
+ void testTable4Example_WithSpecialCharacters() throws IOException
+ {
+ // /A;Name_With-Various***Characters? →
"A;Name_With-Various***Characters?"
+ byte[] inputBytes = "/A;Name_With-Various***Characters?
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("A;Name_With-Various***Characters?", name.getName());
+ }
+
+ @Test
+ void testTable4Example_Numeric() throws IOException
+ {
+ // /1.2 → "1.2"
+ byte[] inputBytes = "/1.2 ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("1.2", name.getName());
+ }
+
+ @Test
+ void testTable4Example_DollarSigns() throws IOException
+ {
+ // /$$ → "$$"
+ byte[] inputBytes = "/$$ ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("$$", name.getName());
+ }
+
+ @Test
+ void testTable4Example_AtPattern() throws IOException
+ {
+ // /@pattern → "@pattern"
+ byte[] inputBytes = "/@pattern ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("@pattern", name.getName());
+ }
+
+ @Test
+ void testTable4Example_DotNotdef() throws IOException
+ {
+ // /.notdef → ".notdef" (space is 0x20, hex-encoded as #20)
+ byte[] inputBytes = "/#2Enotdef ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals(".notdef", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedSpace() throws IOException
+ {
+ // /lime#20Green → "lime Green"
+ byte[] inputBytes = "/lime#20Green
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("lime Green", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedParentheses() throws IOException
+ {
+ // /paired#28#29parentheses → "paired()parentheses"
+ // (#28 = '(', #29 = ')')
+ byte[] inputBytes = "/paired#28#29parentheses
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("paired()parentheses", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedNumberSign() throws IOException
+ {
+ // /The_Key_of_F#23_Minor → "The_Key_of_F#_Minor"
+ // (#23 = '#')
+ byte[] inputBytes = "/The_Key_of_F#23_Minor
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("The_Key_of_F#_Minor", name.getName());
+ }
+
+ @Test
+ void testTable4Example_HexEncodedLetter() throws IOException
+ {
+ // /A#42 → "AB" (note #42 = 'B')
+ byte[] inputBytes = "/A#42 ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("AB", name.getName());
+ }
+
+ @Test
+ void testTable4Example_EmptyName() throws IOException
+ {
+ // / → "" (empty name is valid per spec)
+ byte[] inputBytes = "/ ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("", name.getName());
+ }
+
+ @Test
+ void testNullCharacterTermination() throws IOException
+ {
+ // /Name\0Extra should parse as "Name" and stop at null
+ byte[] inputBytes = new byte[] { '/', 'N', 'a', 'm', 'e', 0, 'E', 'x',
't', 'r', 'a', ' ' };
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name", name.getName());
+ }
+
+ @Test
+ void testInvalidHexSequence() throws IOException
+ {
+ // /Name#GG should keep #G literally since G is not a valid hex digit
+ byte[] inputBytes = "/Name#GG ".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ // When # is not followed by two hex digits, both chars are kept
literally
+ assertEquals("Name#GG", name.getName());
+ }
+
+ @Test
+ void testHexEscapeLowercase() throws IOException
+ {
+ // /Name#2fTest (lowercase hex #2f = '/')
+ byte[] inputBytes = "/Name#2fTest
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name/Test", name.getName());
+ }
+
+ @Test
+ void testHexEscapeUppercase() throws IOException
+ {
+ // /Name#2FTest (uppercase hex #2F = '/')
+ byte[] inputBytes = "/Name#2FTest
".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name/Test", name.getName());
+ }
+
+ @Test
+ void testNameTerminationByDelimiters() throws IOException
+ {
+ // Test termination by '>'
+ byte[] inputBytes = "/Name1>".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ assertEquals("Name1", name.getName());
+
+ // Test termination by '<'
+ inputBytes = "/Name2<".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name2", name.getName());
+
+ // Test termination by '['
+ inputBytes = "/Name3[".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name3", name.getName());
+
+ // Test termination by ']'
+ inputBytes = "/Name4]".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name4", name.getName());
+
+ // Test termination by '('
+ inputBytes = "/Name5(".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name5", name.getName());
+
+ // Test termination by ')'
+ inputBytes = "/Name6)".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name6", name.getName());
+
+ // Test termination by '/'
+ inputBytes = "/Name7/".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name7", name.getName());
+
+ // Test termination by '%'
+ inputBytes = "/Name8%".getBytes(StandardCharsets.US_ASCII);
+ buffer = new RandomAccessReadBuffer(inputBytes);
+ cosParser = new COSParser(buffer);
+ name = cosParser.parseCOSName();
+ assertEquals("Name8", name.getName());
+ }
+
+ @Test
+ void testASCIIRegularCharacters() throws IOException
+ {
+ // Test a range of ASCII characters that are not delimiters
+ // PDF delimiters that terminate name parsing: whitespace, <, >, [, ],
{, }, /, %, (, )
+ byte[] inputBytes =
"/!\"$'*+-._:;=@~^`|\\".getBytes(StandardCharsets.US_ASCII);
+ RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+ COSParser cosParser = new COSParser(buffer);
+ COSName name = cosParser.parseCOSName();
+ // All these non-delimiter characters should be preserved
+ assertEquals("!\"$'*+-._:;=@~^`|\\", name.getName());
+ }
+
+ @Test
+ void testUTF8InNames()
+ {
+ // Create a name with UTF-8 encoded characters
+ String nameStr = "Test中国";
+ byte[] nameBytes = nameStr.getBytes(StandardCharsets.UTF_8);
+ COSName name = COSName.getPDFName(nameBytes);
+
+ // The name should preserve the UTF-8 bytes
+ byte[] retrievedBytes = name.getBytes();
+ // Verify by recreating the string
+ String retrievedStr = new String(retrievedBytes,
StandardCharsets.UTF_8);
+ assertEquals(nameStr, retrievedStr);
+ }
+
+ @Test
+ void testNameCanonicaliation()
+ {
+ byte[] bytes1 = "TestName".getBytes(StandardCharsets.US_ASCII);
+ byte[] bytes2 = "TestName".getBytes(StandardCharsets.US_ASCII);
+
+ COSName name1 = COSName.getPDFName(bytes1);
+ COSName name2 = COSName.getPDFName(bytes2);
+
+ // Same bytes should return references to identical object
+ assertEquals(name1, name2);
+ }
}