Author: lehmi
Date: Sun Oct  5 11:47:07 2025
New Revision: 1928954

Log:
PDFBOX-6041: limit recursion depth to avoid a stack overflow exception as 
proposed by David Justamante

Added:
   
pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/PDFBOX-6041-example.pdf
   (contents, props changed)
Modified:
   
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
   
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java

Modified: 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
        Sun Oct  5 11:34:39 2025        (r1928953)
+++ 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdfparser/BaseParser.java
        Sun Oct  5 11:47:07 2025        (r1928954)
@@ -67,6 +67,12 @@ public abstract class BaseParser
 
     private static final Charset ALTERNATIVE_CHARSET;
 
+    private static final int MAX_RECURSION_DEPTH = 500;
+    private static final String MAX_RECUSRION_MSG = //
+            "Reached maximum recursion depth " + 
Integer.toString(MAX_RECURSION_DEPTH);
+    
+    private int recursionDepth = 0;
+
     private final Map<Long, COSObjectKey> keyCache = new HashMap<>();
 
     static
@@ -269,51 +275,63 @@ public abstract class BaseParser
      */
     protected COSDictionary parseCOSDictionary(boolean isDirect) throws 
IOException
     {
-        readExpectedChar('<');
-        readExpectedChar('<');
-        skipSpaces();
-        COSDictionary obj = new COSDictionary();
-        obj.setDirect(isDirect);
-        while (true)
-        {
-            skipSpaces();
-            char c = (char) source.peek();
-            if (c == '>')
-            {
-                break;
-            }
-            else if (c == '/')
-            {
-                // something went wrong, most likely the dictionary is 
corrupted
-                // stop immediately and return everything read so far
-                if (!parseCOSDictionaryNameValuePair(obj))
-                {
-                    return obj;
-                }
-            }
-            else
-            {
-                // invalid dictionary, we were expecting a /Name, read until 
the end or until we can recover
-                LOG.warn("Invalid dictionary, found: '" + c + "' but expected: 
'/' at offset "
-                        + source.getPosition());
-                if (readUntilEndOfCOSDictionary())
-                {
-                    // we couldn't recover
-                    return obj;
-                }
-            }
-        }
         try
         {
-            readExpectedChar('>');
-            readExpectedChar('>');
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
+            {
+                throw new IOException(MAX_RECUSRION_MSG);
+            }
+               readExpectedChar('<');
+               readExpectedChar('<');
+               skipSpaces();
+               COSDictionary obj = new COSDictionary();
+               obj.setDirect(isDirect);
+               while (true)
+               {
+                   skipSpaces();
+                   char c = (char) source.peek();
+                   if (c == '>')
+                   {
+                       break;
+                   }
+                   else if (c == '/')
+                   {
+                       // something went wrong, most likely the dictionary is 
corrupted
+                       // stop immediately and return everything read so far
+                       if (!parseCOSDictionaryNameValuePair(obj))
+                       {
+                           return obj;
+                       }
+                   }
+                   else
+                   {
+                       // invalid dictionary, we were expecting a /Name, read 
until the end or until we can recover
+                       LOG.warn("Invalid dictionary, found: '" + c + "' but 
expected: '/' at offset "
+                               + source.getPosition());
+                       if (readUntilEndOfCOSDictionary())
+                       {
+                           // we couldn't recover
+                           return obj;
+                       }
+                   }
+               }
+               try
+               {
+                   readExpectedChar('>');
+                   readExpectedChar('>');
+               }
+               catch (IOException exception)
+               {
+                   LOG.warn("Invalid dictionary, can't find end of dictionary 
at offset "
+                           + source.getPosition());
+               }
+               return obj;
         }
-        catch (IOException exception)
+        finally
         {
-            LOG.warn("Invalid dictionary, can't find end of dictionary at 
offset "
-                    + source.getPosition());
+            recursionDepth--;
         }
-        return obj;
     }
 
     /**
@@ -745,71 +763,83 @@ public abstract class BaseParser
      */
     protected COSArray parseCOSArray() throws IOException
     {
-        long startPosition = source.getPosition();
-        readExpectedChar('[');
-        COSArray po = new COSArray();
-        COSBase pbo;
-        skipSpaces();
-        int i;
-        while (((i = source.peek()) > 0) && ((char) i != ']'))
+        try
         {
-            pbo = parseDirObject();
-            if( pbo instanceof COSObject )
-            {
-                // the current empty COSObject is replaced with the correct one
-                pbo = null;
-                // We have to check if the expected values are there or not 
PDFBOX-385
-                if (po.size() > 1 && po.get(po.size() - 1) instanceof 
COSInteger)
-                {
-                    COSInteger genNumber = (COSInteger)po.remove( po.size() -1 
);
-                    if (po.size() > 0 && po.get(po.size() - 1) instanceof 
COSInteger)
-                    {
-                        COSInteger number = (COSInteger)po.remove( po.size() 
-1 );
-                        if (number.longValue() >= 0 && genNumber.intValue() >= 
0)
-                        {
-                            COSObjectKey key = getObjectKey(number.longValue(),
-                                    genNumber.intValue());
-                            pbo = getObjectFromPool(key);
-                        }
-                        else
-                        {
-                            LOG.warn("Invalid value(s) for an object key " + 
number.longValue()
-                                    + " " + genNumber.intValue());
-                        }
-                    }
-                }
-            }
-            // something went wrong
-            if (pbo == null)
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
             {
-                //it could be a bad object in the array which is just skipped
-                LOG.warn("Corrupt array element at offset " + 
source.getPosition()
-                        + ", start offset: " + startPosition);
-                String isThisTheEnd = readString();
-                // return immediately if a corrupt element is followed by 
another array
-                // to avoid a possible infinite recursion as most likely the 
whole array is corrupted
-                if (isThisTheEnd.isEmpty() && source.peek() == '[')
-                {
-                    return po;
-                }
-                
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
-                // This could also be an "endobj" or "endstream" which means 
we can assume that
-                // the array has ended.
-                if(ENDOBJ_STRING.equals(isThisTheEnd) || 
ENDSTREAM_STRING.equals(isThisTheEnd))
-                {
-                    return po;
-                }
+                throw new IOException(MAX_RECUSRION_MSG);
             }
-            else
-            {
-                po.add(pbo);
-            }
-            skipSpaces();
+               long startPosition = source.getPosition();
+               readExpectedChar('[');
+               COSArray po = new COSArray();
+               COSBase pbo;
+               skipSpaces();
+               int i;
+               while (((i = source.peek()) > 0) && ((char) i != ']'))
+               {
+                   pbo = parseDirObject();
+                   if( pbo instanceof COSObject )
+                   {
+                       // the current empty COSObject is replaced with the 
correct one
+                       pbo = null;
+                       // We have to check if the expected values are there or 
not PDFBOX-385
+                       if (po.size() > 1 && po.get(po.size() - 1) instanceof 
COSInteger)
+                       {
+                           COSInteger genNumber = (COSInteger)po.remove( 
po.size() -1 );
+                           if (po.size() > 0 && po.get(po.size() - 1) 
instanceof COSInteger)
+                           {
+                               COSInteger number = (COSInteger)po.remove( 
po.size() -1 );
+                               if (number.longValue() >= 0 && 
genNumber.intValue() >= 0)
+                               {
+                                   COSObjectKey key = 
getObjectKey(number.longValue(),
+                                           genNumber.intValue());
+                                   pbo = getObjectFromPool(key);
+                               }
+                               else
+                               {
+                                   LOG.warn("Invalid value(s) for an object 
key " + number.longValue()
+                                           + " " + genNumber.intValue());
+                               }
+                           }
+                       }
+                   }
+                   // something went wrong
+                   if (pbo == null)
+                   {
+                       //it could be a bad object in the array which is just 
skipped
+                       LOG.warn("Corrupt array element at offset " + 
source.getPosition()
+                               + ", start offset: " + startPosition);
+                       String isThisTheEnd = readString();
+                       // return immediately if a corrupt element is followed 
by another array
+                       // to avoid a possible infinite recursion as most 
likely the whole array is corrupted
+                       if (isThisTheEnd.isEmpty() && source.peek() == '[')
+                       {
+                           return po;
+                       }
+                       
source.rewind(isThisTheEnd.getBytes(StandardCharsets.ISO_8859_1).length);
+                       // This could also be an "endobj" or "endstream" which 
means we can assume that
+                       // the array has ended.
+                       if(ENDOBJ_STRING.equals(isThisTheEnd) || 
ENDSTREAM_STRING.equals(isThisTheEnd))
+                       {
+                           return po;
+                       }
+                   }
+                   else
+                   {
+                       po.add(pbo);
+                   }
+                   skipSpaces();
+               }
+               // read ']'
+               source.read();
+               skipSpaces();
+               return po;
+        }
+        finally
+        {
+            recursionDepth--;
         }
-        // read ']'
-        source.read();
-        skipSpaces();
-        return po;
     }
 
     /**
@@ -938,72 +968,84 @@ public abstract class BaseParser
      */
     protected COSBase parseDirObject() throws IOException
     {
-        skipSpaces();
-        char c = (char) source.peek();
-        switch(c)
+        try
         {
-        case '<':
-            // pull off first left bracket
-            source.read();
-            // check for second left bracket
-            c = (char) source.peek();
-            source.rewind(1);
-            return c == '<' ? parseCOSDictionary(true) : parseCOSString();
-        case '[':
-            // array
-            return parseCOSArray();
-        case '(':
-            return parseCOSString();
-        case '/':   
-            // name
-            return parseCOSName();
-        case 'n':   
-            // null
-            readExpectedString(NULL, false);
-            return COSNull.NULL;
-        case 't':
-            readExpectedString(TRUE, false);
-            return COSBoolean.TRUE;
-        case 'f':
-            readExpectedString(FALSE, false);
-            return COSBoolean.FALSE;
-        case 'R':
-            source.read();
-            return new COSObject(null);
-        case (char)-1:
-            return null;
-        default:
-            if( Character.isDigit(c) || c == '-' || c == '+' || c == '.')
-            {
-                return parseCOSNumber();
-            }
-            // This is not suppose to happen, but we will allow for it
-            // so we are more compatible with POS writers that don't
-            // follow the spec
-            long startOffset = source.getPosition();
-            String badString = readString();
-            if (badString.isEmpty())
+            recursionDepth++;
+            if (recursionDepth > MAX_RECURSION_DEPTH)
             {
-                int peek = source.peek();
-                // we can end up in an infinite loop otherwise
-                throw new IOException("Unknown dir object c='" + c + "' cInt=" 
+ (int) c + " peek='"
-                        + (char) peek + "' peekInt=" + peek + " at offset " + 
source.getPosition()
-                        + " (start offset: " + startOffset + ")");
+                throw new IOException(MAX_RECUSRION_MSG);
             }
-
-            // if it's an endstream/endobj, we want to put it back so the 
caller will see it
-            if (ENDOBJ_STRING.equals(badString) || 
ENDSTREAM_STRING.equals(badString))
-            {
-                
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
-            }
-            else
-            {
-                LOG.warn("Skipped unexpected dir object = '" + badString + "' 
at offset "
-                        + source.getPosition() + " (start offset: " + 
startOffset + ")");
-                return this instanceof PDFStreamParser ? null : COSNull.NULL;
-            }
-        }
-        return null;
+               skipSpaces();
+               char c = (char) source.peek();
+               switch(c)
+               {
+               case '<':
+                   // pull off first left bracket
+                   source.read();
+                   // check for second left bracket
+                   c = (char) source.peek();
+                   source.rewind(1);
+                   return c == '<' ? parseCOSDictionary(true) : 
parseCOSString();
+               case '[':
+                   // array
+                   return parseCOSArray();
+               case '(':
+                   return parseCOSString();
+               case '/':   
+                   // name
+                   return parseCOSName();
+               case 'n':   
+                   // null
+                   readExpectedString(NULL, false);
+                   return COSNull.NULL;
+               case 't':
+                   readExpectedString(TRUE, false);
+                   return COSBoolean.TRUE;
+               case 'f':
+                   readExpectedString(FALSE, false);
+                   return COSBoolean.FALSE;
+               case 'R':
+                   source.read();
+                   return new COSObject(null);
+               case (char)-1:
+                   return null;
+               default:
+                   if( Character.isDigit(c) || c == '-' || c == '+' || c == 
'.')
+                   {
+                       return parseCOSNumber();
+                   }
+                   // This is not suppose to happen, but we will allow for it
+                   // so we are more compatible with POS writers that don't
+                   // follow the spec
+                   long startOffset = source.getPosition();
+                   String badString = readString();
+                   if (badString.isEmpty())
+                   {
+                       int peek = source.peek();
+                       // we can end up in an infinite loop otherwise
+                       throw new IOException("Unknown dir object c='" + c + "' 
cInt=" + (int) c + " peek='"
+                               + (char) peek + "' peekInt=" + peek + " at 
offset " + source.getPosition()
+                               + " (start offset: " + startOffset + ")");
+                   }
+       
+                   // if it's an endstream/endobj, we want to put it back so 
the caller will see it
+                   if (ENDOBJ_STRING.equals(badString) || 
ENDSTREAM_STRING.equals(badString))
+                   {
+                       
source.rewind(badString.getBytes(StandardCharsets.ISO_8859_1).length);
+                   }
+                   else
+                   {
+                       LOG.warn("Skipped unexpected dir object = '" + 
badString + "' at offset "
+                               + source.getPosition() + " (start offset: " + 
startOffset + ")");
+                       return this instanceof PDFStreamParser ? null : 
COSNull.NULL;
+                   }
+               }
+               return null;
+           }
+           finally
+           {
+               recursionDepth--;
+           }
     }
 
     private COSNumber parseCOSNumber() throws IOException

Modified: 
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
    Sun Oct  5 11:34:39 2025        (r1928953)
+++ 
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfparser/TestBaseParser.java
    Sun Oct  5 11:47:07 2025        (r1928954)
@@ -18,9 +18,12 @@
 package org.apache.pdfbox.pdfparser;
 
 import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
 
 import java.io.IOException;
+import java.io.InputStream;
 
+import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.cos.COSString;
 import org.apache.pdfbox.io.RandomAccessReadBuffer;
 import org.junit.jupiter.api.Test;
@@ -88,4 +91,23 @@ class TestBaseParser
         assertEquals(output, cosString.getString());
     }
 
+    @Test
+    void testBaseParserStackOverflow()
+    {
+        // PDFBOX-6041
+        try (InputStream is = 
TestPDFParser.class.getResourceAsStream("PDFBOX-6041-example.pdf"))
+        {
+            Loader.loadPDF(new RandomAccessReadBuffer(is)).close();
+        }
+        catch (IOException exception)
+        {
+            assertEquals("Missing root object specification in trailer.", 
exception.getMessage());
+        }
+        catch (Exception exception)
+        {
+            fail("Unexpected Exception");
+        }
+
+    }
+
 }

Added: 
pdfbox/branches/3.0/pdfbox/src/test/resources/org/apache/pdfbox/pdfparser/PDFBOX-6041-example.pdf
==============================================================================
Binary file. No diff available.

Reply via email to