pdfparser

lehmi Fri, 31 Oct 2025 00:17:37 -0700

Author: lehmi
Date: Fri Oct 31 07:16:27 2025
New Revision: 1929433

Log:
PDFBOX-6093: move the parsing code to the class it belongs to


Added:
   
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
   (contents, props changed)
Modified:
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
   
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
   
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefParser.java
   
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java   
    Fri Oct 31 04:20:03 2025        (r1929432)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java   
    Fri Oct 31 07:16:27 2025        (r1929433)
@@ -19,31 +19,8 @@ package org.apache.pdfbox.pdfparser;
 import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 
-import java.nio.ByteBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 
-import java.util.HashMap;
-import java.util.Map;
-
-import org.apache.logging.log4j.Logger;
-import org.apache.logging.log4j.LogManager;
-
-import org.apache.pdfbox.cos.COSArray;
-import org.apache.pdfbox.cos.COSBase;
-import org.apache.pdfbox.cos.COSBoolean;
-import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSDocument;
-import org.apache.pdfbox.cos.COSInteger;
-import org.apache.pdfbox.cos.COSName;
-import org.apache.pdfbox.cos.COSNull;
-import org.apache.pdfbox.cos.COSNumber;
-import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSObjectKey;
-import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.io.RandomAccessRead;
 
 /**
@@ -53,91 +30,8 @@ import org.apache.pdfbox.io.RandomAccess
  */
 public abstract class BaseParser
 {
-    /**
-     * Log instance.
-     */
-    private static final Logger LOG = LogManager.getLogger(BaseParser.class);
-
-    private static final long OBJECT_NUMBER_THRESHOLD = 10000000000L;
-
-    private static final long GENERATION_NUMBER_THRESHOLD = 65535;
-
     private static final int MAX_LENGTH_LONG = 
Long.toString(Long.MAX_VALUE).length();
 
-    private static final Charset ALTERNATIVE_CHARSET;
-    private static final int MAX_RECURSION_DEPTH = 500;
-    private static final String MAX_RECUSRION_MSG = //
-            "Reached maximum recursion depth " + 
Integer.toString(MAX_RECURSION_DEPTH);
-    
-    private int recursionDepth = 0;
-
-    private final Map<Long, COSObjectKey> keyCache = new HashMap<>();
-
-    static
-    {
-        Charset cs;
-        String charsetName = "Windows-1252";
-        try
-        {
-            cs = Charset.forName(charsetName);
-        }
-        catch (IllegalArgumentException | UnsupportedOperationException e)
-        {
-            cs = StandardCharsets.ISO_8859_1;
-            LOG.warn(() -> "Charset is not supported: " + charsetName + ", 
falling back to " +
-                    StandardCharsets.ISO_8859_1.name(), e);
-        }
-        ALTERNATIVE_CHARSET = cs;
-    }
-
-    // CharSetDecoders are not threadsafe so not static
-    private final CharsetDecoder utf8Decoder = 
StandardCharsets.UTF_8.newDecoder()
-            .onMalformedInput(CodingErrorAction.REPORT)
-            .onUnmappableCharacter(CodingErrorAction.REPORT);
-
-    protected static final int E = 'e';
-    protected static final int N = 'n';
-    protected static final int D = 'd';
-
-    protected static final int S = 's';
-    protected static final int T = 't';
-    protected static final int R = 'r';
-    protected static final int A = 'a';
-    protected static final int M = 'm';
-
-    protected static final int O = 'o';
-    protected static final int B = 'b';
-    protected static final int J = 'j';
-
-    /**
-     * This is a string constant that will be used for comparisons.
-     */
-    public static final String DEF = "def";
-    /**
-     * This is a string constant that will be used for comparisons.
-     */
-    protected static final String ENDOBJ_STRING = "endobj";
-    /**
-     * This is a string constant that will be used for comparisons.
-     */
-    protected static final String ENDSTREAM_STRING = "endstream";
-    /**
-     * This is a string constant that will be used for comparisons.
-     */
-    protected static final String STREAM_STRING = "stream";
-    /**
-     * This is a string constant that will be used for comparisons.
-     */
-    private static final char[] TRUE = { 't', 'r', 'u', 'e' };
-    /**
-     * This is a string constant that will be used for comparisons.
-     */
-    private static final char[] FALSE = { 'f', 'a', 'l', 's', 'e' };
-    /**
-     * This is a string constant that will be used for comparisons.
-     */
-    private static final char[] NULL = { 'n', 'u', 'l', 'l' };
-
     /**
      * ASCII code for Null.
      */
@@ -168,11 +62,6 @@ public abstract class BaseParser
     protected final RandomAccessRead source;
 
     /**
-     * This is the document that will be parsed.
-     */
-    protected COSDocument document;
-
-    /**
      * Default constructor.
      */
     BaseParser(RandomAccessRead pdfSource)
@@ -180,244 +69,6 @@ public abstract class BaseParser
         this.source = pdfSource;
     }
 
-    private static boolean isHexDigit(char ch)
-    {
-        return isDigit(ch) ||
-        (ch >= 'a' && ch <= 'f') ||
-        (ch >= 'A' && ch <= 'F');
-    }
-
-    /**
-     * Returns the object key for the given combination of object and 
generation number. The object key from the cross
-     * reference table/stream will be reused if available. Otherwise a newly 
created object will be returned.
-     * 
-     * @param num the given object number
-     * @param gen the given generation number
-     * 
-     * @return the COS object key
-     */
-    protected COSObjectKey getObjectKey(long num, int gen)
-    {
-        if (document == null || document.getXrefTable().isEmpty())
-        {
-            return new COSObjectKey(num, gen);
-        }
-        // use a cache to get the COSObjectKey as iterating over the 
xref-table-map gets slow for big pdfs
-        // in the long run we have to overhaul the object pool or even better 
remove it
-        Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
-        if (xrefTable.size() > keyCache.size())
-        {
-            for (COSObjectKey key : xrefTable.keySet())
-            {
-                keyCache.putIfAbsent(key.getInternalHash(), key);
-            }
-        }
-        long internalHashCode = COSObjectKey.computeInternalHash(num, gen);
-        COSObjectKey foundKey = keyCache.get(internalHashCode);
-        return foundKey != null ? foundKey : new COSObjectKey(num, gen);
-    }
-
-    /**
-     * This will parse a PDF dictionary value.
-     *
-     * @return The parsed Dictionary object.
-     *
-     * @throws IOException If there is an error parsing the dictionary object.
-     */
-    private COSBase parseCOSDictionaryValue() throws IOException
-    {
-        long numOffset = source.getPosition();
-        COSBase value = parseDirObject();
-        skipSpaces();
-        // proceed if the given object is a number and the following is a 
number as well
-        if (!(value instanceof COSNumber) || !isDigit())
-        {
-            return value;
-        }
-        // read the remaining information of the object number
-        long genOffset = source.getPosition();
-        COSBase generationNumber = parseDirObject();
-        skipSpaces();
-        readExpectedChar('R');
-        if (!(value instanceof COSInteger))
-        {
-            LOG.error("expected number, actual={} at offset {}", value, 
numOffset);
-            return COSNull.NULL;
-        }
-        if (!(generationNumber instanceof COSInteger))
-        {
-            LOG.error("expected number, actual={} at offset {}", 
generationNumber, genOffset);
-            return COSNull.NULL;
-        }
-        long objNumber = ((COSInteger) value).longValue();
-        if (objNumber <= 0)
-        {
-            LOG.warn("invalid object number value ={} at offset {}", 
objNumber, numOffset);
-            return COSNull.NULL;
-        }
-        int genNumber = ((COSInteger) generationNumber).intValue();
-        if (genNumber < 0)
-        {
-            LOG.error("invalid generation number value ={} at offset {}", 
genNumber, numOffset);
-            return COSNull.NULL;
-        }
-        // dereference the object
-        return getObjectFromPool(getObjectKey(objNumber, genNumber));
-    }
-
-    private COSBase getObjectFromPool(COSObjectKey key) throws IOException
-    {
-        if (document == null)
-        {
-            throw new IOException("object reference " + key + " at offset " + 
source.getPosition()
-                    + " in content stream");
-        }
-        return document.getObjectFromPool(key);
-    }
-
-    /**
-     * This will parse a PDF dictionary.
-     *
-     * @param isDirect indicates whether the dictionary to be read is a direct 
object
-     * @return The parsed dictionary, never null.
-     *
-     * @throws IOException If there is an error reading the stream.
-     */
-    protected COSDictionary parseCOSDictionary(boolean isDirect) throws 
IOException
-    {
-        try
-        {
-            recursionDepth++;
-            if (recursionDepth > MAX_RECURSION_DEPTH)
-            {
-                throw new IOException(MAX_RECUSRION_MSG);
-            }
-            readExpectedChar('<');
-            readExpectedChar('<');
-            skipSpaces();
-            COSDictionary obj = new COSDictionary();
-            obj.setDirect(isDirect);
-            while (true)
-            {
-                skipSpaces();
-                char c = (char) source.peek();
-                if (c == '>')
-                {
-                    break;
-                }
-                else if (c == '/')
-                {
-                    // something went wrong, most likely the dictionary is 
corrupted
-                    // stop immediately and return everything read so far
-                    if (!parseCOSDictionaryNameValuePair(obj))
-                    {
-                        return obj;
-                    }
-                }
-                else
-                {
-                    // invalid dictionary, we were expecting a /Name, read 
until the end or until we can recover
-                    LOG.warn("Invalid dictionary, found: '{}' but expected: 
'/' at offset {}", c,
-                            source.getPosition());
-                    if (readUntilEndOfCOSDictionary())
-                    {
-                        // we couldn't recover
-                        return obj;
-                    }
-                }
-            }
-            try
-            {
-                readExpectedChar('>');
-                readExpectedChar('>');
-            }
-            catch (IOException exception)
-            {
-                LOG.warn("Invalid dictionary, can't find end of dictionary at 
offset {}",
-                        source.getPosition());
-            }
-            return obj;
-        }
-        finally
-        {
-            recursionDepth--;
-        }
-    }
-
-    /**
-     * Keep reading until the end of the dictionary object or the file has 
been hit, or until a '/'
-     * has been found.
-     *
-     * @return true if the end of the object or the file has been found, false 
if not, i.e. that the
-     * caller can continue to parse the dictionary at the current position.
-     *
-     * @throws IOException if there is a reading error.
-     */
-    private boolean readUntilEndOfCOSDictionary() throws IOException
-    {
-        int c = source.read();
-        while (c != -1 && c != '/' && c != '>')
-        {
-            // in addition to stopping when we find / or >, we also want
-            // to stop when we find endstream or endobj.
-            if (c == E)
-            {
-                c = source.read();
-                if (c == N)
-                {
-                    c = source.read();
-                    if (c == D)
-                    {
-                        c = source.read();
-                        boolean isStream = c == S && source.read() == T && 
source.read() == R
-                                && source.read() == E && source.read() == A && 
source.read() == M;
-                        boolean isObj = !isStream && c == O && source.read() 
== B
-                                && source.read() == J;
-                        if (isStream || isObj)
-                        {
-                            // we're done reading this object!
-                            return true;
-                        }
-                    }
-                }
-            }
-            c = source.read();
-        }
-        if (c == -1)
-        {
-            return true;
-        }
-        source.rewind(1);
-        return false;
-    }
-
-    private boolean parseCOSDictionaryNameValuePair(COSDictionary obj) throws 
IOException
-    {
-        COSName key = parseCOSName();
-        if (key == null || key.getName().isEmpty())
-        {
-            LOG.warn("Empty COSName at offset {}", source.getPosition());
-        }
-        COSBase value = parseCOSDictionaryValue();
-        skipSpaces();
-        if (value == null)
-        {
-            LOG.warn("Bad dictionary declaration at offset {}", 
source.getPosition());
-            return false;
-        }
-        else if (value instanceof COSInteger && !((COSInteger) 
value).isValid())
-        {
-            LOG.warn("Skipped out of range number value at offset {}", 
source.getPosition());
-        }
-        else
-        {
-            // label this item as direct, to avoid signature problems.
-            value.setDirect(true);
-            obj.setItem(key, value);
-        }
-        return true;
-    }
-
     /**
      * Skip the upcoming CRLF or LF which are supposed to follow a stream. 
Trailing spaces are removed as well.
      * 
@@ -536,27 +187,71 @@ public abstract class BaseParser
     }
 
     /**
-     * This will parse a PDF string.
+     * Determine if a character terminates a PDF name.
      *
-     * @return The parsed PDF string.
+     * @param ch The character
+     * @return true if the character terminates a PDF name, otherwise false.
+     */
+    protected static boolean isEndOfName(int ch)
+    {
+        switch (ch)
+        {
+        case ASCII_SPACE:
+        case ASCII_CR:
+        case ASCII_LF:
+        case ASCII_TAB:
+        case '>':
+        case '<':
+        case '[':
+        case '/':
+        case ']':
+        case ')':
+        case '(':
+        case ASCII_NULL:
+        case '\f':
+        case '%':
+        case -1:
+            return true;
+        default:
+            return false;
+        }
+    }
+
+    /**
+     * This will read the next string from the stream.
+     *
+     * @return The string that was read from the stream, never null.
      *
      * @throws IOException If there is an error reading from the stream.
      */
-    protected COSString parseCOSString() throws IOException
+    protected String readString() throws IOException
     {
-        char nextChar = (char) source.read();
-        if (nextChar == '<')
+        skipSpaces();
+        StringBuilder buffer = new StringBuilder();
+        int c = source.read();
+        while (!isEndOfName(c))
         {
-            return parseCOSHexString();
+            buffer.append( (char)c );
+            c = source.read();
         }
-        else if (nextChar != '(')
+        if (c != -1)
         {
-            throw new IOException( "parseCOSString string should start with 
'(' or '<' and not '" +
-                    nextChar + "' at offset " + source.getPosition());
+            source.rewind(1);
         }
-        
+        return buffer.toString();
+    }
+    
+    /**
+     * This will parse a PDF string.
+     *
+     * @return The parsed PDF string.
+     *
+     * @throws IOException If there is an error reading from the stream.
+     */
+    protected byte[] readLiteralString() throws IOException
+    {
+        readExpectedChar('(');
         ByteArrayOutputStream out = new ByteArrayOutputStream();
-
         // This is the number of braces read
         int braces = 1;
         int c = source.read();
@@ -570,7 +265,7 @@ public abstract class BaseParser
 
                 braces--;
                 braces = checkForEndOfString(braces);
-                if( braces != 0 )
+                if (braces != 0)
                 {
                     out.write(ch);
                 }
@@ -604,509 +299,98 @@ public abstract class BaseParser
                     case ')':
                         // PDFBox 276 /Title (c:\)
                     braces = checkForEndOfString(braces);
-                        if( braces != 0 )
-                        {
-                            out.write(next);
-                        }
-                        else
-                        {
-                            out.write('\\');
-                        }
-                        break;
-                    case '(':
-                    case '\\':
+                    if (braces != 0)
+                    {
                         out.write(next);
-                        break;
-                    case ASCII_LF:
-                    case ASCII_CR:
-                        //this is a break in the line so ignore it and the 
newline and continue
+                    }
+                    else
+                    {
+                        out.write('\\');
+                    }
+                    break;
+                case '(':
+                case '\\':
+                    out.write(next);
+                    break;
+                case ASCII_LF:
+                case ASCII_CR:
+                    // this is a break in the line so ignore it and the 
newline and continue
+                    c = source.read();
+                    while (isEOL(c) && c != -1)
+                    {
                         c = source.read();
-                        while( isEOL(c) && c != -1)
-                        {
-                            c = source.read();
-                        }
-                        nextc = c;
-                        break;
-                    case '0':
-                    case '1':
-                    case '2':
-                    case '3':
-                    case '4':
-                    case '5':
-                    case '6':
-                    case '7':
-                        StringBuilder octal = new StringBuilder();
-                        octal.append( next );
+                    }
+                    nextc = c;
+                    break;
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                    StringBuilder octal = new StringBuilder();
+                    octal.append(next);
+                    c = source.read();
+                    char digit = (char) c;
+                    if (digit >= '0' && digit <= '7')
+                    {
+                        octal.append(digit);
                         c = source.read();
-                        char digit = (char)c;
-                        if( digit >= '0' && digit <= '7' )
+                        digit = (char) c;
+                        if (digit >= '0' && digit <= '7')
                         {
-                            octal.append( digit );
-                            c = source.read();
-                            digit = (char)c;
-                            if( digit >= '0' && digit <= '7' )
-                            {
-                                octal.append( digit );
-                            }
-                            else
-                            {
-                                nextc = c;
-                            }
+                            octal.append(digit);
                         }
                         else
                         {
                             nextc = c;
                         }
-    
-                        int character = 0;
-                        try
-                        {
-                            character = Integer.parseInt( octal.toString(), 8 
);
-                        }
-                        catch( NumberFormatException e )
-                        {
-                            throw new IOException( "Error: Expected octal 
character, actual='" + octal + "'", e );
-                        }
-                        out.write(character);
-                        break;
-                    default:
-                        // dropping the backslash
-                        // see 7.3.4.2 Literal Strings for further information
-                        out.write(next);
-                }
-            }
-            else
-            {
-                out.write(ch);
-            }
-            if (nextc != -2)
-            {
-                c = nextc;
-            }
-            else
-            {
-                c = source.read();
-            }
-        }
-        if (c != -1)
-        {
-            source.rewind(1);
-        }
-        return new COSString(out.toByteArray());
-    }
-
-    /**
-     * This will parse a PDF HEX string with fail fast semantic
-     * meaning that we stop if a not allowed character is found.
-     * This is necessary in order to detect malformed input and
-     * be able to skip to next object start.
-     *
-     * We assume starting '&lt;' was already read.
-     * 
-     * @return The parsed PDF string.
-     *
-     * @throws IOException If there is an error reading from the stream.
-     */
-    private COSString parseCOSHexString() throws IOException
-    {
-        final StringBuilder sBuf = new StringBuilder();
-        while( true )
-        {
-            int c = source.read();
-            if ( isHexDigit((char)c) )
-            {
-                sBuf.append( (char) c );
-            }
-            else if ( c == '>' )
-            {
-                break;
-            }
-            else if ( c < 0 ) 
-            {
-                throw new IOException( "Missing closing bracket for hex 
string. Reached EOS." );
-            }
-            else if ( ( c == ' ' ) || ( c == '\n' ) ||
-                    ( c == '\t' ) || ( c == '\r' ) ||
-                    ( c == '\b' ) || ( c == '\f' ) )
-            {
-                continue;
-            }
-            else
-            {
-                // if invalid chars was found: discard last
-                // hex character if it is not part of a pair
-                if (sBuf.length()%2!=0)
-                {
-                    sBuf.deleteCharAt(sBuf.length()-1);
-                }
-                
-                // read till the closing bracket was found
-                do 
-                {
-                    c = source.read();
-                } 
-                while ( c != '>' && c >= 0 );
-                
-                // might have reached EOF while looking for the closing bracket
-                // this can happen for malformed PDFs only. Make sure that 
there is
-                // no endless loop.
-                if ( c < 0 ) 
-                {
-                    throw new IOException( "Missing closing bracket for hex 
string. Reached EOS." );
-                }
-                
-                // exit loop
-                break;
-            }
-        }
-        return COSString.parseHex(sBuf.toString());
-    }
-   
-    /**
-     * This will parse a PDF array object.
-     *
-     * @return The parsed PDF array.
-     *
-     * @throws IOException If there is an error parsing the stream.
-     */
-    protected COSArray parseCOSArray() throws IOException
-    {
-        try
-        {
-            recursionDepth++;
-            if (recursionDepth > MAX_RECURSION_DEPTH)
-            {
-                throw new IOException(MAX_RECUSRION_MSG);
-            }
-            long startPosition = source.getPosition();
-            readExpectedChar('[');
-            COSArray po = new COSArray();
-            COSBase pbo;
-            skipSpaces();
-            int i;
-            while (((i = source.peek()) > 0) && ((char) i != ']'))
-            {
-                pbo = parseDirObject();
-                if (pbo instanceof COSObject)
-                {
-                    // the current empty COSObject is replaced with the 
correct one
-                    pbo = null;
-                    // We have to check if the expected values are there or 
not PDFBOX-385
-                    if (po.size() > 1 && po.get(po.size() - 1) instanceof 
COSInteger)
-                    {
-                        COSInteger genNumber = (COSInteger) 
po.remove(po.size() - 1);
-                        if (po.size() > 0 && po.get(po.size() - 1) instanceof 
COSInteger)
-                        {
-                            COSInteger number = (COSInteger) 
po.remove(po.size() - 1);
-                            if (number.longValue() >= 0 && 
genNumber.intValue() >= 0)
-                            {
-                                COSObjectKey key = 
getObjectKey(number.longValue(),
-                                        genNumber.intValue());
-                                pbo = getObjectFromPool(key);
-                            }
-                            else
-                            {
-                                LOG.warn("Invalid value(s) for an object key 
{} {}", number.longValue(),
-                                        genNumber.intValue());
-                            }
-                        }
-                    }
-                }
-                // something went wrong
-                if (pbo == null)
-                {
-                    //it could be a bad object in the array which is just 
skipped
-                    LOG.warn("Corrupt array element at offset {}, start 
offset: {}",
-                            source.getPosition(), startPosition);
-                    String isThisTheEnd = readString();
-                    // return immediately if a corrupt element is followed by 
another array
-                    // to avoid a possible infinite recursion as most likely 
the whole array is corrupted
-                    if (isThisTheEnd.isEmpty() && source.peek() == '[')
-                    {
-                        return po;
                     }
-                    
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
-                    // This could also be an "endobj" or "endstream" which 
means we can assume that
-                    // the array has ended.
-                    if (ENDOBJ_STRING.equals(isThisTheEnd) || 
ENDSTREAM_STRING.equals(isThisTheEnd))
+                    else
                     {
-                        return po;
+                        nextc = c;
                     }
-                }
-                else
-                {
-                    po.add(pbo);
-                }
-                skipSpaces();
-            }
-            // read ']'
-            source.read();
-            skipSpaces();
-            return po;
-        }
-        finally
-        {
-            recursionDepth--;
-        }
-    }
-
-    /**
-     * Determine if a character terminates a PDF name.
-     *
-     * @param ch The character
-     * @return true if the character terminates a PDF name, otherwise false.
-     */
-    protected static boolean isEndOfName(int ch)
-    {
-        switch (ch)
-        {
-        case ASCII_SPACE:
-        case ASCII_CR:
-        case ASCII_LF:
-        case ASCII_TAB:
-        case '>':
-        case '<':
-        case '[':
-        case '/':
-        case ']':
-        case ')':
-        case '(':
-        case ASCII_NULL:
-        case '\f':
-        case '%':
-        case -1:
-            return true;
-        default:
-            return false;
-        }
-    }
 
-    /**
-     * This will parse a PDF name from the stream.
-     *
-     * @return The parsed PDF name.
-     * @throws IOException If there is an error reading from the stream.
-     */
-    protected COSName parseCOSName() throws IOException
-    {
-        readExpectedChar('/');
-        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
-        int c = source.read();
-        while (!isEndOfName(c))
-        {
-            final int ch = c;
-            if (ch == '#')
-            {
-                int ch1 = source.read();
-                int ch2 = source.read();
-                // Prior to PDF v1.2, the # was not a special character.  Also,
-                // it has been observed that various PDF tools do not follow 
the
-                // spec with respect to the # escape, even though they report
-                // PDF versions of 1.2 or later.  The solution here is that we
-                // interpret the # as an escape only when it is followed by two
-                // valid hex digits.
-                if (isHexDigit((char)ch1) && isHexDigit((char)ch2))
-                {
-                    String hex = Character.toString((char) ch1) + (char) ch2;
+                    int character = 0;
                     try
                     {
-                        buffer.write(Integer.parseInt(hex, 16));
+                        character = Integer.parseInt(octal.toString(), 8);
                     }
                     catch (NumberFormatException e)
                     {
-                        throw new IOException("Error: expected hex digit, 
actual='" + hex + "'", e);
+                        throw new IOException(
+                                "Error: Expected octal character, actual='" + 
octal + "'", e);
                     }
-                    c = source.read();
-                }
-                else
-                {
-                    // check for premature EOF
-                    if (ch2 == -1 || ch1 == -1)
-                    {
-                        LOG.error("Premature EOF in BaseParser#parseCOSName");
-                        c = -1;
-                        break;
-                    }
-                    source.rewind(1);
-                    c = ch1;
-                    buffer.write(ch);
+                    out.write(character);
+                    break;
+                default:
+                    // dropping the backslash
+                    // see 7.3.4.2 Literal Strings for further information
+                    out.write(next);
                 }
             }
             else
             {
-                buffer.write(ch);
-                c = source.read();
+                out.write(ch);
             }
-        }
-        if (c != -1)
-        {
-            source.rewind(1);
-        }
-
-        return COSName.getPDFName(decodeBuffer(buffer));
-    }
-
-    /**
-     * Tries to decode the buffer content to an UTF-8 String. If that fails, 
tries the alternative Encoding.
-     * 
-     * @param buffer the {@link ByteArrayOutputStream} containing the bytes to 
decode
-     * @return the decoded String
-     */
-    private String decodeBuffer(ByteArrayOutputStream buffer)
-    {
-        try
-        {
-            return 
utf8Decoder.decode(ByteBuffer.wrap(buffer.toByteArray())).toString();
-        }
-        catch (CharacterCodingException e)
-        {
-            // some malformed PDFs don't use UTF-8 see PDFBOX-3347
-            LOG.debug(() -> "Buffer could not be decoded using 
StandardCharsets.UTF_8 - trying " + 
-                    ALTERNATIVE_CHARSET.name(), e);
-            return buffer.toString(ALTERNATIVE_CHARSET);
-        }
-    }
-    
-    /**
-     * This will parse a directory object from the stream.
-     *
-     * @return The parsed object.
-     *
-     * @throws IOException If there is an error during parsing.
-     */
-    protected COSBase parseDirObject() throws IOException
-    {
-        try
-        {
-            recursionDepth++;
-            if (recursionDepth > MAX_RECURSION_DEPTH)
+            if (nextc != -2)
             {
-                throw new IOException(MAX_RECUSRION_MSG);
+                c = nextc;
             }
-            skipSpaces();
-            char c = (char) source.peek();
-            switch (c)
+            else
             {
-            case '<':
-                // pull off first left bracket
-                source.read();
-                // check for second left bracket
-                c = (char) source.peek();
-                source.rewind(1);
-                return c == '<' ? parseCOSDictionary(true) : parseCOSString();
-            case '[':
-                // array
-                return parseCOSArray();
-            case '(':
-                return parseCOSString();
-            case '/':
-                // name
-                return parseCOSName();
-            case 'n':
-                // null
-                readExpectedString(NULL, false);
-                return COSNull.NULL;
-            case 't':
-                readExpectedString(TRUE, false);
-                return COSBoolean.TRUE;
-            case 'f':
-                readExpectedString(FALSE, false);
-                return COSBoolean.FALSE;
-            case 'R':
-                source.read();
-                return new COSObject(null);
-            case (char) -1:
-                return null;
-            default:
-                if (isDigit(c) || c == '-' || c == '+' || c == '.')
-                {
-                    return parseCOSNumber();
-                }
-                // This is not suppose to happen, but we will allow for it
-                // so we are more compatible with POS writers that don't
-                // follow the spec
-                long startOffset = source.getPosition();
-                String badString = readString();
-                if (badString.isEmpty())
-                {
-                    int peek = source.peek();
-                    // we can end up in an infinite loop otherwise
-                    throw new IOException("Unknown dir object c='" + c + "' 
cInt=" + (int) c + " peek='"
-                            + (char) peek + "' peekInt=" + peek + " at offset 
" + source.getPosition()
-                            + " (start offset: " + startOffset + ")");
-                }
-
-                // if it's an endstream/endobj, we want to put it back so the 
caller will see it
-                if (ENDOBJ_STRING.equals(badString) || 
ENDSTREAM_STRING.equals(badString))
-                {
-                    
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
-                }
-                else
-                {
-                    LOG.warn("Skipped unexpected dir object = '{}' at offset 
{} (start offset: {})",
-                            badString, source.getPosition(), startOffset);
-                    return this instanceof PDFStreamParser ? null : 
COSNull.NULL;
-                }
+                c = source.read();
             }
-            return null;
-        }
-        finally
-        {
-            recursionDepth--;
-        }
-    }
-
-    private COSNumber parseCOSNumber() throws IOException
-    {
-        StringBuilder buf = new StringBuilder();
-        int ic = source.read();
-        char c = (char) ic;
-        while (Character.isDigit(c) || c == '-' || c == '+' || c == '.' || c 
== 'E' || c == 'e')
-        {
-            buf.append(c);
-            ic = source.read();
-            c = (char) ic;
-        }
-        if (ic != -1)
-        {
-            source.rewind(1);
-        }
-
-        // PDFBOX-5025: catch "74191endobj"
-        char lastc = buf.charAt(buf.length() - 1);
-        if (lastc == 'e' || lastc == 'E')
-        {
-            buf.deleteCharAt(buf.length() - 1);
-            source.rewind(1);
-        }
-
-        return COSNumber.get(buf.toString());
-    }
-
-    /**
-     * This will read the next string from the stream.
-     *
-     * @return The string that was read from the stream, never null.
-     *
-     * @throws IOException If there is an error reading from the stream.
-     */
-    protected String readString() throws IOException
-    {
-        skipSpaces();
-        StringBuilder buffer = new StringBuilder();
-        int c = source.read();
-        while (!isEndOfName(c))
-        {
-            buffer.append( (char)c );
-            c = source.read();
         }
         if (c != -1)
         {
             source.rewind(1);
         }
-        return buffer.toString();
+        return out.toByteArray();
     }
-    
+
     /**
      * Reads given pattern from {@link #source}. Skipping whitespace at start 
and end if wanted.
      * 
@@ -1147,43 +431,6 @@ public abstract class BaseParser
     }
 
     /**
-     * This will read bytes until the first end of line marker occurs.
-     * NOTE: The EOL marker may consists of 1 (CR or LF) or 2 (CR and CL) bytes
-     * which is an important detail if one wants to unread the line.
-     *
-     * @return The characters between the current position and the end of the 
line.
-     *
-     * @throws IOException If there is an error reading from the stream.
-     */
-    protected String readLine() throws IOException
-    {
-        if (source.isEOF())
-        {
-            throw new IOException( "Error: End-of-File, expected line at 
offset " +
-                    source.getPosition());
-        }
-
-        StringBuilder buffer = new StringBuilder( 11 );
-
-        int c;
-        while ((c = source.read()) != -1)
-        {
-            // CR and LF are valid EOLs
-            if (isEOL(c))
-            {
-                break;
-            }
-            buffer.append( (char)c );
-        }
-        // CR+LF is also a valid EOL 
-        if (isCR(c) && isLF(source.peek()))
-        {
-            source.read();
-        }
-        return buffer.toString();
-    }
-
-    /**
      * This will tell if the end of the data is reached.
      * 
      * @return true if the end of the data is reached.
@@ -1211,7 +458,7 @@ public abstract class BaseParser
      * @param c The character to check against line feed
      * @return true if the next byte is 0x0A.
      */
-    private static boolean isLF(int c)
+    protected static boolean isLF(int c)
     {
         return ASCII_LF == c;
     }
@@ -1222,7 +469,7 @@ public abstract class BaseParser
      * @param c The character to check against carriage return
      * @return true if the next byte is 0x0D.
      */
-    private static boolean isCR(int c)
+    protected static boolean isCR(int c)
     {
         return ASCII_CR == c;
     }
@@ -1339,41 +586,6 @@ public abstract class BaseParser
     }
 
     /**
-     * This will read a long from the Stream and throw an {@link IOException} 
if
-     * the long value is negative or has more than 10 digits (i.e. : bigger 
than
-     * {@link #OBJECT_NUMBER_THRESHOLD})
-     *
-     * @return the object number being read.
-     * @throws IOException if an I/O error occurs
-     */
-    protected long readObjectNumber() throws IOException
-    {
-        long retval = readLong();
-        if (retval < 0 || retval >= OBJECT_NUMBER_THRESHOLD)
-        {
-            throw new IOException("Object Number '" + retval + "' has more 
than 10 digits or is negative");
-        }
-        return retval;
-    }
-
-    /**
-     * This will read a integer from the Stream and throw an {@link 
IllegalArgumentException} if the integer value
-     * has more than the maximum object revision (i.e. : bigger than {@link 
#GENERATION_NUMBER_THRESHOLD})
-     * @return the generation number being read.
-     * @throws IOException if an I/O error occurs
-     */
-    protected int readGenerationNumber() throws IOException
-    {
-        int retval = readInt();
-        if(retval < 0 || retval > GENERATION_NUMBER_THRESHOLD)
-        {
-            throw new IOException(
-                    "Generation Number '" + retval + "' has more than 5 digits 
or is negative");
-        }
-        return retval;
-    }
-    
-    /**
      * This will read an integer from the stream.
      *
      * @return The integer that was read from the stream.
@@ -1400,7 +612,6 @@ public abstract class BaseParser
         }
         return retval;
     }
-    
 
     /**
      * This will read an long from the stream.
@@ -1455,4 +666,5 @@ public abstract class BaseParser
         }
         return buffer;
     }
+
 }

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java    
    Fri Oct 31 04:20:03 2025        (r1929432)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/COSParser.java    
    Fri Oct 31 07:16:27 2025        (r1929433)
@@ -16,8 +16,14 @@
  */
 package org.apache.pdfbox.pdfparser;
 
+import java.io.ByteArrayOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CodingErrorAction;
 import java.nio.charset.StandardCharsets;
 import java.security.GeneralSecurityException;
 import java.security.KeyStore;
@@ -31,14 +37,17 @@ import org.apache.logging.log4j.Logger;
 import org.apache.logging.log4j.LogManager;
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
+import org.apache.pdfbox.cos.COSBoolean;
 import org.apache.pdfbox.cos.COSDictionary;
 import org.apache.pdfbox.cos.COSDocument;
+import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSNull;
 import org.apache.pdfbox.cos.COSNumber;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.cos.COSStream;
+import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.cos.ICOSParser;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
@@ -61,20 +70,62 @@ import org.apache.pdfbox.pdmodel.encrypt
  */
 public class COSParser extends BaseParser implements ICOSParser
 {
+    private static final Logger LOG = LogManager.getLogger(COSParser.class);
+
     private static final String PDF_HEADER = "%PDF-";
     private static final String FDF_HEADER = "%FDF-";
     
     private static final String PDF_DEFAULT_VERSION = "1.4";
     private static final String FDF_DEFAULT_VERSION = "1.0";
 
+    private static final int E = 'e';
+    private static final int N = 'n';
+    private static final int D = 'd';
+
+    private static final int S = 's';
+    private static final int T = 't';
+    private static final int R = 'r';
+    private static final int A = 'a';
+    private static final int M = 'm';
+
+    private static final int O = 'o';
+    private static final int B = 'b';
+    private static final int J = 'j';
+
+    /**
+     * This is a string constant that will be used for comparisons.
+     */
+    private static final String ENDOBJ_STRING = "endobj";
+    /**
+     * This is a string constant that will be used for comparisons.
+     */
+    private static final String ENDSTREAM_STRING = "endstream";
+    /**
+     * This is a string constant that will be used for comparisons.
+     */
+    private static final String STREAM_STRING = "stream";
+
     private static final char[] STARTXREF = { 
's','t','a','r','t','x','r','e','f' };
 
     private static final byte[] ENDSTREAM = { E, N, D, S, T, R, E, A, M };
 
     private static final byte[] ENDOBJ = { E, N, D, O, B, J };
+    /**
+     * This is a string constant that will be used for comparisons.
+     */
+    private static final char[] TRUE = { 't', 'r', 'u', 'e' };
+    /**
+     * This is a string constant that will be used for comparisons.
+     */
+    private static final char[] FALSE = { 'f', 'a', 'l', 's', 'e' };
+    /**
+     * This is a string constant that will be used for comparisons.
+     */
+    private static final char[] NULL = { 'n', 'u', 'l', 'l' };
+
+    private static final long OBJECT_NUMBER_THRESHOLD = 10000000000L;
+    private static final long GENERATION_NUMBER_THRESHOLD = 65535;
 
-    protected static final long MINIMUM_SEARCH_OFFSET = 6;
-    
     private static final int STRMBUFLEN = 2048;
     private final byte[] strmBuf = new byte[ STRMBUFLEN ];
 
@@ -84,6 +135,30 @@ public class COSParser extends BaseParse
     private String password = "";
     private String keyAlias = null;
 
+    private static final Charset ALTERNATIVE_CHARSET;
+
+    static
+    {
+        Charset cs;
+        String charsetName = "Windows-1252";
+        try
+        {
+            cs = Charset.forName(charsetName);
+        }
+        catch (IllegalArgumentException | UnsupportedOperationException e)
+        {
+            cs = StandardCharsets.ISO_8859_1;
+            LOG.warn(() -> "Charset is not supported: " + charsetName + ", 
falling back to "
+                    + StandardCharsets.ISO_8859_1.name(), e);
+        }
+        ALTERNATIVE_CHARSET = cs;
+    }
+
+    // CharSetDecoders are not threadsafe so not static
+    private final CharsetDecoder utf8Decoder = 
StandardCharsets.UTF_8.newDecoder()
+            .onMalformedInput(CodingErrorAction.REPORT)
+            .onUnmappableCharacter(CodingErrorAction.REPORT);
+
     /**
      * The range within the %%EOF marker will be searched.
      * Useful if there are additional characters after %%EOF within the PDF. 
@@ -114,6 +189,12 @@ public class COSParser extends BaseParse
      */
     private boolean isLenient = true;
 
+    private static final int MAX_RECURSION_DEPTH = 500;
+    private static final String MAX_RECUSRION_MSG = //
+            "Reached maximum recursion depth " + 
Integer.toString(MAX_RECURSION_DEPTH);
+
+    private int recursionDepth = 0;
+
     protected boolean initialParseDone = false;
 
     private boolean trailerWasRebuild = false;
@@ -121,7 +202,14 @@ public class COSParser extends BaseParse
     private BruteForceParser bruteForceParser = null;
     private PDEncryption encryption = null;
     private final Map<COSObjectKey, Long> xrefTable = new HashMap<>();
-    
+
+    private final Map<Long, COSObjectKey> keyCache = new HashMap<>();
+
+    /**
+     * This is the document that will be parsed.
+     */
+    protected COSDocument document;
+
     /**
      * Intermediate cache. Contains all objects of already read compressed 
object streams. Objects are removed after
      * dereferencing them.
@@ -138,8 +226,6 @@ public class COSParser extends BaseParse
      */
     private int readTrailBytes = DEFAULT_TRAIL_BYTECOUNT; 
 
-    private static final Logger LOG = LogManager.getLogger(COSParser.class);
-
     /**
      * Default constructor.
      *
@@ -697,18 +783,428 @@ public class COSParser extends BaseParse
     }
     
     /**
-     * This will read a COSStream from the input stream using length attribute 
within dictionary. If
-     * length attribute is a indirect reference it is first resolved to get 
the stream length. This
-     * means we copy stream data without testing for 'endstream' or 'endobj' 
and thus it is no
-     * problem if these keywords occur within stream. We require 'endstream' 
to be found after
-     * stream data is read.
+     * This will parse a PDF array object.
+     *
+     * @return The parsed PDF array.
+     *
+     * @throws IOException If there is an error parsing the stream.
+     */
+    protected COSArray parseCOSArray() throws IOException
+    {
+        try
+        {
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
+            {
+                throw new IOException(MAX_RECUSRION_MSG);
+            }
+            long startPosition = source.getPosition();
+            readExpectedChar('[');
+            COSArray po = new COSArray();
+            COSBase pbo;
+            skipSpaces();
+            int i;
+            while (((i = source.peek()) > 0) && ((char) i != ']'))
+            {
+                pbo = parseDirObject();
+                if (pbo instanceof COSObject)
+                {
+                    // the current empty COSObject is replaced with the 
correct one
+                    pbo = null;
+                    // We have to check if the expected values are there or 
not PDFBOX-385
+                    if (po.size() > 1 && po.get(po.size() - 1) instanceof 
COSInteger)
+                    {
+                        COSInteger genNumber = (COSInteger) 
po.remove(po.size() - 1);
+                        if (po.size() > 0 && po.get(po.size() - 1) instanceof 
COSInteger)
+                        {
+                            COSInteger number = (COSInteger) 
po.remove(po.size() - 1);
+                            if (number.longValue() >= 0 && 
genNumber.intValue() >= 0)
+                            {
+                                COSObjectKey key = 
getObjectKey(number.longValue(),
+                                        genNumber.intValue());
+                                pbo = getObjectFromPool(key);
+                            }
+                            else
+                            {
+                                LOG.warn("Invalid value(s) for an object key 
{} {}",
+                                        number.longValue(), 
genNumber.intValue());
+                            }
+                        }
+                    }
+                }
+                // something went wrong
+                if (pbo == null)
+                {
+                    // it could be a bad object in the array which is just 
skipped
+                    LOG.warn("Corrupt array element at offset {}, start 
offset: {}",
+                            source.getPosition(), startPosition);
+                    String isThisTheEnd = readString();
+                    // return immediately if a corrupt element is followed by 
another array
+                    // to avoid a possible infinite recursion as most likely 
the whole array is corrupted
+                    if (isThisTheEnd.isEmpty() && source.peek() == '[')
+                    {
+                        return po;
+                    }
+                    
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
+                    // This could also be an "endobj" or "endstream" which 
means we can assume that
+                    // the array has ended.
+                    if (ENDOBJ_STRING.equals(isThisTheEnd) || 
ENDSTREAM_STRING.equals(isThisTheEnd))
+                    {
+                        return po;
+                    }
+                }
+                else
+                {
+                    po.add(pbo);
+                }
+                skipSpaces();
+            }
+            // read ']'
+            source.read();
+            skipSpaces();
+            return po;
+        }
+        finally
+        {
+            recursionDepth--;
+        }
+    }
+
+    /**
+     * This will parse a PDF dictionary.
+     *
+     * @param isDirect indicates whether the dictionary to be read is a direct 
object
+     * @return The parsed dictionary, never null.
+     *
+     * @throws IOException If there is an error reading the stream.
+     */
+    protected COSDictionary parseCOSDictionary(boolean isDirect) throws 
IOException
+    {
+        try
+        {
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
+            {
+                throw new IOException(MAX_RECUSRION_MSG);
+            }
+            readExpectedChar('<');
+            readExpectedChar('<');
+            skipSpaces();
+            COSDictionary obj = new COSDictionary();
+            obj.setDirect(isDirect);
+            while (true)
+            {
+                skipSpaces();
+                char c = (char) source.peek();
+                if (c == '>')
+                {
+                    break;
+                }
+                else if (c == '/')
+                {
+                    // something went wrong, most likely the dictionary is 
corrupted
+                    // stop immediately and return everything read so far
+                    if (!parseCOSDictionaryNameValuePair(obj))
+                    {
+                        return obj;
+                    }
+                }
+                else
+                {
+                    // invalid dictionary, we were expecting a /Name, read 
until the end or until we can recover
+                    LOG.warn("Invalid dictionary, found: '{}' but expected: 
'/' at offset {}", c,
+                            source.getPosition());
+                    if (readUntilEndOfCOSDictionary())
+                    {
+                        // we couldn't recover
+                        return obj;
+                    }
+                }
+            }
+            try
+            {
+                readExpectedChar('>');
+                readExpectedChar('>');
+            }
+            catch (IOException exception)
+            {
+                LOG.warn("Invalid dictionary, can't find end of dictionary at 
offset {}",
+                        source.getPosition());
+            }
+            return obj;
+        }
+        finally
+        {
+            recursionDepth--;
+        }
+    }
+
+    private boolean parseCOSDictionaryNameValuePair(COSDictionary obj) throws 
IOException
+    {
+        COSName key = parseCOSName();
+        if (key == null || key.getName().isEmpty())
+        {
+            LOG.warn("Empty COSName at offset {}", source.getPosition());
+        }
+        COSBase value = parseCOSDictionaryValue();
+        skipSpaces();
+        if (value == null)
+        {
+            LOG.warn("Bad dictionary declaration at offset {}", 
source.getPosition());
+            return false;
+        }
+        else if (value instanceof COSInteger && !((COSInteger) 
value).isValid())
+        {
+            LOG.warn("Skipped out of range number value at offset {}", 
source.getPosition());
+        }
+        else
+        {
+            // label this item as direct, to avoid signature problems.
+            value.setDirect(true);
+            obj.setItem(key, value);
+        }
+        return true;
+    }
+
+    private COSNumber parseCOSNumber() throws IOException
+    {
+        StringBuilder buf = new StringBuilder();
+        int ic = source.read();
+        char c = (char) ic;
+        while (Character.isDigit(c) || c == '-' || c == '+' || c == '.' || c 
== 'E' || c == 'e')
+        {
+            buf.append(c);
+            ic = source.read();
+            c = (char) ic;
+        }
+        if (ic != -1)
+        {
+            source.rewind(1);
+        }
+
+        // PDFBOX-5025: catch "74191endobj"
+        char lastc = buf.charAt(buf.length() - 1);
+        if (lastc == 'e' || lastc == 'E')
+        {
+            buf.deleteCharAt(buf.length() - 1);
+            source.rewind(1);
+        }
+
+        return COSNumber.get(buf.toString());
+    }
+
+    /**
+     * This will parse a PDF dictionary value.
+     *
+     * @return The parsed Dictionary object.
+     *
+     * @throws IOException If there is an error parsing the dictionary object.
+     */
+    private COSBase parseCOSDictionaryValue() throws IOException
+    {
+        long numOffset = source.getPosition();
+        COSBase value = parseDirObject();
+        skipSpaces();
+        // proceed if the given object is a number and the following is a 
number as well
+        if (!(value instanceof COSNumber) || !isDigit())
+        {
+            return value;
+        }
+        // read the remaining information of the object number
+        long genOffset = source.getPosition();
+        COSBase generationNumber = parseDirObject();
+        skipSpaces();
+        readExpectedChar('R');
+        if (!(value instanceof COSInteger))
+        {
+            LOG.error("expected number, actual={} at offset {}", value, 
numOffset);
+            return COSNull.NULL;
+        }
+        if (!(generationNumber instanceof COSInteger))
+        {
+            LOG.error("expected number, actual={} at offset {}", 
generationNumber, genOffset);
+            return COSNull.NULL;
+        }
+        long objNumber = ((COSInteger) value).longValue();
+        if (objNumber <= 0)
+        {
+            LOG.warn("invalid object number value ={} at offset {}", 
objNumber, numOffset);
+            return COSNull.NULL;
+        }
+        int genNumber = ((COSInteger) generationNumber).intValue();
+        if (genNumber < 0)
+        {
+            LOG.error("invalid generation number value ={} at offset {}", 
genNumber, numOffset);
+            return COSNull.NULL;
+        }
+        // dereference the object
+        return getObjectFromPool(getObjectKey(objNumber, genNumber));
+    }
+
+    /**
+     * This will parse a directory object from the stream.
+     *
+     * @return The parsed object.
+     *
+     * @throws IOException If there is an error during parsing.
+     */
+    protected COSBase parseDirObject() throws IOException
+    {
+        try
+        {
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
+            {
+                throw new IOException(MAX_RECUSRION_MSG);
+            }
+            skipSpaces();
+            char c = (char) source.peek();
+            switch (c)
+            {
+            case '<':
+                // pull off first left bracket
+                source.read();
+                // check for second left bracket
+                c = (char) source.peek();
+                if (c == '<')
+                {
+                    source.rewind(1);
+                    return parseCOSDictionary(true);
+                }
+                else
+                {
+                    return parseCOSHexString();
+                }
+            case '[':
+                // array
+                return parseCOSArray();
+            case '(':
+                return parseCOSLiteralString();
+            case '/':
+                // name
+                return parseCOSName();
+            case 'n':
+                // null
+                readExpectedString(NULL, false);
+                return COSNull.NULL;
+            case 't':
+                readExpectedString(TRUE, false);
+                return COSBoolean.TRUE;
+            case 'f':
+                readExpectedString(FALSE, false);
+                return COSBoolean.FALSE;
+            case 'R':
+                source.read();
+                return new COSObject(null);
+            case (char) -1:
+                return null;
+            default:
+                if (isDigit(c) || c == '-' || c == '+' || c == '.')
+                {
+                    return parseCOSNumber();
+                }
+                // This is not suppose to happen, but we will allow for it
+                // so we are more compatible with POS writers that don't
+                // follow the spec
+                long startOffset = source.getPosition();
+                String badString = readString();
+                if (badString.isEmpty())
+                {
+                    int peek = source.peek();
+                    // we can end up in an infinite loop otherwise
+                    throw new IOException("Unknown dir object c='" + c + "' 
cInt=" + (int) c
+                            + " peek='" + (char) peek + "' peekInt=" + peek + 
" at offset "
+                            + source.getPosition() + " (start offset: " + 
startOffset + ")");
+                }
+
+                // if it's an endstream/endobj, we want to put it back so the 
caller will see it
+                if (ENDOBJ_STRING.equals(badString) || 
ENDSTREAM_STRING.equals(badString))
+                {
+                    
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
+                }
+                else
+                {
+                    LOG.warn("Skipped unexpected dir object = '{}' at offset 
{} (start offset: {})",
+                            badString, source.getPosition(), startOffset);
+                    return this instanceof PDFStreamParser ? null : 
COSNull.NULL;
+                }
+            }
+            return null;
+        }
+        finally
+        {
+            recursionDepth--;
+        }
+    }
+
+    private COSBase getObjectFromPool(COSObjectKey key) throws IOException
+    {
+        if (document == null)
+        {
+            throw new IOException("object reference " + key + " at offset " + 
source.getPosition()
+                    + " in content stream");
+        }
+        return document.getObjectFromPool(key);
+    }
+
+    /**
+     * Keep reading until the end of the dictionary object or the file has 
been hit, or until a '/' has been found.
+     *
+     * @return true if the end of the object or the file has been found, false 
if not, i.e. that the caller can continue
+     * to parse the dictionary at the current position.
+     *
+     * @throws IOException if there is a reading error.
+     */
+    private boolean readUntilEndOfCOSDictionary() throws IOException
+    {
+        int c = source.read();
+        while (c != -1 && c != '/' && c != '>')
+        {
+            // in addition to stopping when we find / or >, we also want
+            // to stop when we find endstream or endobj.
+            if (c == E)
+            {
+                c = source.read();
+                if (c == N)
+                {
+                    c = source.read();
+                    if (c == D)
+                    {
+                        c = source.read();
+                        boolean isStream = c == S && source.read() == T && 
source.read() == R
+                                && source.read() == E && source.read() == A && 
source.read() == M;
+                        boolean isObj = !isStream && c == O && source.read() 
== B
+                                && source.read() == J;
+                        if (isStream || isObj)
+                        {
+                            // we're done reading this object!
+                            return true;
+                        }
+                    }
+                }
+            }
+            c = source.read();
+        }
+        if (c == -1)
+        {
+            return true;
+        }
+        source.rewind(1);
+        return false;
+    }
+
+    /**
+     * This will read a COSStream from the input stream using length attribute 
within dictionary. If length attribute is
+     * a indirect reference it is first resolved to get the stream length. 
This means we copy stream data without
+     * testing for 'endstream' or 'endobj' and thus it is no problem if these 
keywords occur within stream. We require
+     * 'endstream' to be found after stream data is read.
      *
      * @param dic dictionary that goes with this stream.
      *
      * @return parsed pdf stream.
      *
-     * @throws IOException if an error occurred reading the stream, like 
problems with reading
-     * length attribute, stream does not end with 'endstream' after data read, 
stream too short etc.
+     * @throws IOException if an error occurred reading the stream, like 
problems with reading length attribute, stream
+     * does not end with 'endstream' after data read, stream too short etc.
      */
     protected COSStream parseCOSStream(COSDictionary dic) throws IOException
     {
@@ -1050,6 +1546,78 @@ public class COSParser extends BaseParse
     }
 
     /**
+     * This will read a long from the Stream and throw an {@link IOException} 
if the long value is negative or has more
+     * than 10 digits (i.e. : bigger than {@link #OBJECT_NUMBER_THRESHOLD})
+     *
+     * @return the object number being read.
+     * @throws IOException if an I/O error occurs
+     */
+    protected long readObjectNumber() throws IOException
+    {
+        long retval = readLong();
+        if (retval < 0 || retval >= OBJECT_NUMBER_THRESHOLD)
+        {
+            throw new IOException(
+                    "Object Number '" + retval + "' has more than 10 digits or 
is negative");
+        }
+        return retval;
+    }
+
+    /**
+     * This will read a integer from the Stream and throw an {@link 
IllegalArgumentException} if the integer value has
+     * more than the maximum object revision (i.e. : bigger than {@link 
#GENERATION_NUMBER_THRESHOLD})
+     * 
+     * @return the generation number being read.
+     * @throws IOException if an I/O error occurs
+     */
+    protected int readGenerationNumber() throws IOException
+    {
+        int retval = readInt();
+        if (retval < 0 || retval > GENERATION_NUMBER_THRESHOLD)
+        {
+            throw new IOException(
+                    "Generation Number '" + retval + "' has more than 5 digits 
or is negative");
+        }
+        return retval;
+    }
+
+    /**
+     * This will read bytes until the first end of line marker occurs. NOTE: 
The EOL marker may consists of 1 (CR or LF)
+     * or 2 (CR and CL) bytes which is an important detail if one wants to 
unread the line.
+     *
+     * @return The characters between the current position and the end of the 
line.
+     *
+     * @throws IOException If there is an error reading from the stream.
+     */
+    protected String readLine() throws IOException
+    {
+        if (source.isEOF())
+        {
+            throw new IOException(
+                    "Error: End-of-File, expected line at offset " + 
source.getPosition());
+        }
+
+        StringBuilder buffer = new StringBuilder(11);
+
+        int c;
+        while ((c = source.read()) != -1)
+        {
+            // CR and LF are valid EOLs
+            if (isEOL(c))
+            {
+                break;
+            }
+            buffer.append((char) c);
+        }
+        // CR+LF is also a valid EOL
+        if (isCR(c) && isLF(source.peek()))
+        {
+            source.read();
+        }
+        return buffer.toString();
+    }
+
+    /**
      * Parse the header of a pdf.
      * 
      * @return true if a PDF header was found
@@ -1253,4 +1821,202 @@ public class COSParser extends BaseParse
     {
         return securityHandler;
     }
+
+    /**
+     * This will parse a PDF name from the stream.
+     *
+     * @return The parsed PDF name.
+     * @throws IOException If there is an error reading from the stream.
+     */
+    protected COSName parseCOSName() throws IOException
+    {
+        readExpectedChar('/');
+        ByteArrayOutputStream buffer = new ByteArrayOutputStream();
+        int c = source.read();
+        while (!isEndOfName(c))
+        {
+            final int ch = c;
+            if (ch == '#')
+            {
+                int ch1 = source.read();
+                int ch2 = source.read();
+                // Prior to PDF v1.2, the # was not a special character. Also,
+                // it has been observed that various PDF tools do not follow 
the
+                // spec with respect to the # escape, even though they report
+                // PDF versions of 1.2 or later. The solution here is that we
+                // interpret the # as an escape only when it is followed by two
+                // valid hex digits.
+                if (isHexDigit((char) ch1) && isHexDigit((char) ch2))
+                {
+                    String hex = Character.toString((char) ch1) + (char) ch2;
+                    try
+                    {
+                        buffer.write(Integer.parseInt(hex, 16));
+                    }
+                    catch (NumberFormatException e)
+                    {
+                        throw new IOException("Error: expected hex digit, 
actual='" + hex + "'", e);
+                    }
+                    c = source.read();
+                }
+                else
+                {
+                    // check for premature EOF
+                    if (ch2 == -1 || ch1 == -1)
+                    {
+                        LOG.error("Premature EOF in BaseParser#parseCOSName");
+                        c = -1;
+                        break;
+                    }
+                    source.rewind(1);
+                    c = ch1;
+                    buffer.write(ch);
+                }
+            }
+            else
+            {
+                buffer.write(ch);
+                c = source.read();
+            }
+        }
+        if (c != -1)
+        {
+            source.rewind(1);
+        }
+
+        return COSName.getPDFName(decodeBuffer(buffer));
+    }
+
+    private static boolean isHexDigit(char ch)
+    {
+        return isDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 
'F');
+    }
+
+    /**
+     * This will parse a PDF string.
+     *
+     * @return The parsed PDF string.
+     *
+     * @throws IOException If there is an error reading from the stream.
+     */
+    protected COSString parseCOSLiteralString() throws IOException
+    {
+        return new COSString(readLiteralString());
+    }
+
+    /**
+     * This will parse a PDF HEX string with fail fast semantic meaning that 
we stop if a not allowed character is
+     * found. This is necessary in order to detect malformed input and be able 
to skip to next object start.
+     *
+     * We assume starting '&lt;' was already read.
+     * 
+     * @return The parsed PDF string.
+     *
+     * @throws IOException If there is an error reading from the stream.
+     */
+    protected COSString parseCOSHexString() throws IOException
+    {
+        final StringBuilder sBuf = new StringBuilder();
+        while (true)
+        {
+            int c = source.read();
+            if (isHexDigit((char) c))
+            {
+                sBuf.append((char) c);
+            }
+            else if (c == '>')
+            {
+                break;
+            }
+            else if (c < 0)
+            {
+                throw new IOException("Missing closing bracket for hex string. 
Reached EOS.");
+            }
+            else if ((c == ' ') || (c == '\n') || (c == '\t') || (c == '\r') 
|| (c == '\b')
+                    || (c == '\f'))
+            {
+                continue;
+            }
+            else
+            {
+                // if invalid chars was found: discard last
+                // hex character if it is not part of a pair
+                if (sBuf.length() % 2 != 0)
+                {
+                    sBuf.deleteCharAt(sBuf.length() - 1);
+                }
+
+                // read till the closing bracket was found
+                do
+                {
+                    c = source.read();
+                } while (c != '>' && c >= 0);
+
+                // might have reached EOF while looking for the closing bracket
+                // this can happen for malformed PDFs only. Make sure that 
there is
+                // no endless loop.
+                if (c < 0)
+                {
+                    throw new IOException("Missing closing bracket for hex 
string. Reached EOS.");
+                }
+
+                // exit loop
+                break;
+            }
+        }
+        return COSString.parseHex(sBuf.toString());
+    }
+
+    /**
+     * Tries to decode the buffer content to an UTF-8 String. If that fails, 
tries the alternative Encoding.
+     * 
+     * @param buffer the {@link ByteArrayOutputStream} containing the bytes to 
decode
+     * @return the decoded String
+     */
+    private String decodeBuffer(ByteArrayOutputStream buffer)
+    {
+        try
+        {
+            return 
utf8Decoder.decode(ByteBuffer.wrap(buffer.toByteArray())).toString();
+        }
+        catch (CharacterCodingException e)
+        {
+            // some malformed PDFs don't use UTF-8 see PDFBOX-3347
+            LOG.debug(() -> "Buffer could not be decoded using 
StandardCharsets.UTF_8 - trying "
+                    + ALTERNATIVE_CHARSET.name(), e);
+            return buffer.toString(ALTERNATIVE_CHARSET);
+        }
+    }
+
+    /**
+     * Returns the object key for the given combination of object and 
generation number. The object key from the cross
+     * reference table/stream will be reused if available. Otherwise a newly 
created object will be returned.
+     * 
+     * @param num the given object number
+     * @param gen the given generation number
+     * 
+     * @return the COS object key
+     */
+    protected COSObjectKey getObjectKey(long num, int gen)
+    {
+        // return new COSObjectKey(num, gen);
+        if (document == null || document.getXrefTable().isEmpty())
+        {
+            return new COSObjectKey(num, gen);
+        }
+        // use a cache to get the COSObjectKey as iterating over the 
xref-table-map gets slow for big pdfs
+        // in the long run we have to overhaul the object pool or even better 
remove it
+        Map<COSObjectKey, Long> xrefTable = document.getXrefTable();
+        if (xrefTable.size() > keyCache.size())
+        {
+            for (COSObjectKey key : xrefTable.keySet())
+            {
+                keyCache.putIfAbsent(key.getInternalHash(), key);
+            }
+        }
+        long internalHashCode = COSObjectKey.computeInternalHash(num, gen);
+        COSObjectKey foundKey = keyCache.get(internalHashCode);
+        return foundKey != null ? foundKey : new COSObjectKey(num, gen);
+    }
+
 }

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
    Fri Oct 31 04:20:03 2025        (r1929432)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFObjectStreamParser.java
    Fri Oct 31 07:16:27 2025        (r1929433)
@@ -34,7 +34,7 @@ import org.apache.pdfbox.cos.COSStream;
  * @author Ben Litchfield
  * 
  */
-public class PDFObjectStreamParser extends BaseParser
+public class PDFObjectStreamParser extends COSParser
 {
     private final int numberOfObjects;
     private final int firstObject;

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
  Fri Oct 31 04:20:03 2025        (r1929432)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/PDFStreamParser.java
  Fri Oct 31 07:16:27 2025        (r1929433)
@@ -39,7 +39,7 @@ import org.apache.pdfbox.io.RandomAccess
  *
  * @author Ben Litchfield
  */
-public class PDFStreamParser extends BaseParser
+public class PDFStreamParser extends COSParser
 {
     /**
      * Log instance.
@@ -66,8 +66,9 @@ public class PDFStreamParser extends Bas
      * Constructor.
      *
      * @param bytes the bytes to parse.
+     * @throws IOException If there is an error initializing the stream.
      */
-    public PDFStreamParser(byte[] bytes)
+    public PDFStreamParser(byte[] bytes) throws IOException
     {
         super(new RandomAccessReadBuffer(bytes));
     }
@@ -114,15 +115,13 @@ public class PDFStreamParser extends Bas
             case '<':
                 // pull off first left bracket
                 source.read();
-
                 // check for second left bracket
                 c = (char) source.peek();
 
-                // put back first bracket
-                source.rewind(1);
-
                 if (c == '<')
                 {
+                    // put back first bracket
+                    source.rewind(1);
                     try
                     {
                         return parseCOSDictionary(true);
@@ -137,7 +136,7 @@ public class PDFStreamParser extends Bas
                 }
                 else
                 {
-                    return parseCOSString();
+                    return parseCOSHexString();
                 }
             case '[':
                 // array
@@ -154,7 +153,7 @@ public class PDFStreamParser extends Bas
                 }
             case '(':
                 // string
-                return parseCOSString();
+                return parseCOSLiteralString();
             case '/':
                 // name
                 return parseCOSName();

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefParser.java   
    Fri Oct 31 04:20:03 2025        (r1929432)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/XrefParser.java   
    Fri Oct 31 07:16:27 2025        (r1929433)
@@ -46,6 +46,8 @@ public class XrefParser
     private static final char[] XREF_TABLE = { 'x', 'r', 'e', 'f' };
     private static final char[] STARTXREF = { 's', 't', 'a', 'r', 't', 'x', 
'r', 'e', 'f' };
 
+    private static final long MINIMUM_SEARCH_OFFSET = 6;
+
     /** 
      * Collects all Xref/trailer objects and resolves them into single
      * object using startxref reference. 
@@ -476,7 +478,7 @@ public class XrefParser
             Map<COSObjectKey, Long> xrefOffset) throws IOException
     {
         // there can't be any object at the very beginning of a pdf
-        if (offset < COSParser.MINIMUM_SEARCH_OFFSET)
+        if (offset < MINIMUM_SEARCH_OFFSET)
         {
             return null;
         }

Modified: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
   Fri Oct 31 04:20:03 2025        (r1929432)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
   Fri Oct 31 07:16:27 2025        (r1929433)
@@ -24,72 +24,11 @@ import java.io.IOException;
 import java.io.InputStream;
 
 import org.apache.pdfbox.Loader;
-import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.io.RandomAccessReadBuffer;
 import org.junit.jupiter.api.Test;
 
 class TestBaseParser
 {
-    @Test
-    void testCheckForEndOfString() throws IOException
-    {
-        // (Test)
-        byte[] inputBytes = { 40, 84, 101, 115, 116, 41 };
-
-        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
-        BaseParser baseParser = new COSParser(buffer);
-        COSString cosString = baseParser.parseCOSString();
-        assertEquals("Test", cosString.getString());
-
-        String output = "(Test";
-        // ((Test) + LF + "/ "
-        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 10, '/', 
' ' };
-
-        buffer = new RandomAccessReadBuffer(inputBytes);
-        baseParser = new COSParser(buffer);
-        cosString = baseParser.parseCOSString();
-        assertEquals(output, cosString.getString());
-
-        // ((Test) + CR + "/ "
-        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, '/', 
' ' };
-
-        buffer = new RandomAccessReadBuffer(inputBytes);
-        baseParser = new COSParser(buffer);
-        cosString = baseParser.parseCOSString();
-        assertEquals(output, cosString.getString());
-
-        // ((Test) + CR + LF + "/ "
-        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, 10, 
'/' };
-
-        buffer = new RandomAccessReadBuffer(inputBytes);
-        baseParser = new COSParser(buffer);
-        cosString = baseParser.parseCOSString();
-        assertEquals(output, cosString.getString());
-
-        // ((Test) + LF + "> "
-        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 10, '>', 
' ' };
-
-        buffer = new RandomAccessReadBuffer(inputBytes);
-        baseParser = new COSParser(buffer);
-        cosString = baseParser.parseCOSString();
-        assertEquals(output, cosString.getString());
-
-        // ((Test) + CR + "> "
-        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, '>', 
' ' };
-
-        buffer = new RandomAccessReadBuffer(inputBytes);
-        baseParser = new COSParser(buffer);
-        cosString = baseParser.parseCOSString();
-        assertEquals(output, cosString.getString());
-
-        // ((Test) + CR + LF + "> "
-        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, 10, 
'>' };
-
-        buffer = new RandomAccessReadBuffer(inputBytes);
-        baseParser = new COSParser(buffer);
-        cosString = baseParser.parseCOSString();
-        assertEquals(output, cosString.getString());
-    }
 
     @Test
     void testBaseParserStackOverflow()

Added: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestCOSParser.java
    Fri Oct 31 07:16:27 2025        (r1929433)
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.pdfbox.pdfparser;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+
+import java.io.IOException;
+
+import org.apache.pdfbox.cos.COSString;
+import org.apache.pdfbox.io.RandomAccessReadBuffer;
+import org.junit.jupiter.api.Test;
+
+class TestCOSParser
+{
+    @Test
+    void testCheckForEndOfString() throws IOException
+    {
+        // (Test)
+        byte[] inputBytes = { 40, 84, 101, 115, 116, 41 };
+
+        RandomAccessReadBuffer buffer = new RandomAccessReadBuffer(inputBytes);
+        COSParser cosParser = new COSParser(buffer);
+        COSString cosString = cosParser.parseCOSLiteralString();
+        assertEquals("Test", cosString.getString());
+
+        String output = "(Test";
+        // ((Test) + LF + "/ "
+        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 10, '/', 
' ' };
+
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        cosString = cosParser.parseCOSLiteralString();
+        assertEquals(output, cosString.getString());
+
+        // ((Test) + CR + "/ "
+        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, '/', 
' ' };
+
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        cosString = cosParser.parseCOSLiteralString();
+        assertEquals(output, cosString.getString());
+
+        // ((Test) + CR + LF + "/ "
+        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, 10, 
'/' };
+
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        cosString = cosParser.parseCOSLiteralString();
+        assertEquals(output, cosString.getString());
+
+        // ((Test) + LF + "> "
+        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 10, '>', 
' ' };
+
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        cosString = cosParser.parseCOSLiteralString();
+        assertEquals(output, cosString.getString());
+
+        // ((Test) + CR + "> "
+        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, '>', 
' ' };
+
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        cosString = cosParser.parseCOSLiteralString();
+        assertEquals(output, cosString.getString());
+
+        // ((Test) + CR + LF + "> "
+        inputBytes = new byte[] { '(', '(', 'T', 'e', 's', 't', ')', 13, 10, 
'>' };
+
+        buffer = new RandomAccessReadBuffer(inputBytes);
+        cosParser = new COSParser(buffer);
+        cosString = cosParser.parseCOSLiteralString();
+        assertEquals(output, cosString.getString());
+    }
+
+}

svn commit: r1929433 - in pdfbox/trunk/pdfbox/src: main/java/org/apache/pdfbox/pdfparser test/java/org/apache/pdfbox/pdfparser

Reply via email to