Author: lehmi
Date: Sat Nov 22 10:08:53 2025
New Revision: 1929915

Log:
PDFBOX-6036: replace recursive algorithm with an iterative one to avoid a 
StackOverflowException

Added:
   
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterCompressionPoolTest.java
   (contents, props changed)
Modified:
   
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java
        Sat Nov 22 10:05:39 2025        (r1929914)
+++ 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdfwriter/compress/COSWriterCompressionPool.java
        Sat Nov 22 10:08:53 2025        (r1929915)
@@ -18,9 +18,9 @@ package org.apache.pdfbox.pdfwriter.comp
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Collection;
 import java.util.Collections;
 import java.util.HashSet;
-import java.util.Iterator;
 import java.util.List;
 import java.util.Set;
 
@@ -29,7 +29,6 @@ import org.apache.pdfbox.pdmodel.PDDocum
 import org.apache.pdfbox.cos.COSArray;
 import org.apache.pdfbox.cos.COSBase;
 import org.apache.pdfbox.cos.COSDictionary;
-import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.cos.COSObjectKey;
@@ -79,11 +78,23 @@ public class COSWriterCompressionPool
         objectPool = new 
COSObjectPool(document.getDocument().getHighestXRefObjectNumber());
 
         // Initialize object pool.
-        COSDocument cosDocument = document.getDocument();
-
-        COSDictionary trailer = cosDocument.getTrailer();
-        addStructure(trailer.getItem(COSName.ROOT));
-        addStructure(trailer.getItem(COSName.INFO));
+        COSDictionary trailer = document.getDocument().getTrailer();
+        List<COSBase> cosBaseList = new ArrayList<>();
+        COSDictionary root = trailer.getCOSDictionary(COSName.ROOT);
+        if (root != null)
+        {
+            cosBaseList.add(root);
+        }
+        COSDictionary info = trailer.getCOSDictionary(COSName.INFO);
+        if (info != null)
+        {
+            cosBaseList.add(info);
+        }
+        while (!cosBaseList.isEmpty())
+        {
+            cosBaseList = addStructure(cosBaseList);
+        }
+        allDirectObjects.clear();
 
         Collections.sort(objectStreamObjects);
         Collections.sort(topLevelObjects);
@@ -159,10 +170,27 @@ public class COSWriterCompressionPool
     /**
      * Attempts to find yet unregistered streams and dictionaries in the given 
structure.
      *
+     * @param cosBaseList A list of objects to be added for compressing.
+     * @throws IOException Shall be thrown, if adding failed.
+     */
+    private List<COSBase> addStructure(List<COSBase> cosBaseList) throws 
IOException
+    {
+        List<COSBase> cosBaseListNext = new ArrayList<>();
+        for (COSBase cosBase : cosBaseList)
+        {
+            cosBaseListNext.addAll(addStructure(cosBase));
+        }
+        cosBaseList.clear();
+        return cosBaseListNext;
+    }
+
+    /**
+     * Attempts to find yet unregistered streams and dictionaries in the given 
structure.
+     *
      * @param current The object to be added for compressing.
      * @throws IOException Shall be thrown, if compressing the object failed.
      */
-    private void addStructure(COSBase current) throws IOException
+    private List<COSBase> addStructure(COSBase current) throws IOException
     {
         COSBase base = current;
         if (current instanceof COSStream
@@ -182,45 +210,63 @@ public class COSWriterCompressionPool
         }
         if (base instanceof COSArray)
         {
-            addElements(((COSArray) base).iterator());
+            return getElements(((COSArray) base).toList());
         }
         else if (base instanceof COSDictionary)
         {
-            addElements(((COSDictionary) base).getValues().iterator());
+            return getElements(((COSDictionary) base).getValues());
         }
+        return Collections.emptyList();
     }
 
-    private void addElements(Iterator<COSBase> elements) throws IOException
+    /**
+     * Collect all relevant objects from a COSDictionary/COSArray.
+     * 
+     * @param elements collection of all elements of a COSDictionary/COSArray.
+     * 
+     * @return a collection containing the relevant objects within the given 
Collection.
+     * @throws IOException if something went wrong.
+     */
+    private List<COSBase> getElements(Collection<? extends COSBase> elements) 
throws IOException
     {
-        while (elements.hasNext())
+        List<COSBase> relevantElements = new ArrayList<>();
+        for (COSBase element : elements)
         {
-            COSBase value = elements.next();
-            if (value instanceof COSArray
-                    || (value instanceof COSDictionary
-                    && !allDirectObjects.contains(value)))
+            if (filterElement(element))
             {
-                allDirectObjects.add(value);
-                addStructure(value);
+                relevantElements.add(element);
             }
-            else if (value instanceof COSObject)
+        }
+        return relevantElements;
+    }
+
+    private boolean filterElement(COSBase element) throws IOException
+    {
+        if (element instanceof COSObject)
+        {
+            COSObject cosObject = (COSObject) element;
+            if (cosObject.getKey() != null && 
objectPool.contains(cosObject.getKey()))
             {
-                COSObject cosObject = (COSObject) value;
-                if (cosObject.getKey() != null && 
objectPool.contains(cosObject.getKey()))
-                {
-                    // check if the stored object matches the referenced 
object otherwise replace the key with a new one
-                    // there may differences if some imported content uses the 
same object numbers than the target pdf
-                    if 
(objectPool.getObject(cosObject.getKey()).equals(cosObject.getObject()))
-                    {
-                        continue;
-                    }
-                    cosObject.setKey(null);
-                }
-                if (cosObject.getObject() != null)
+                // check if the stored object matches the referenced object 
otherwise replace the key with a new one
+                // there may differences if some imported content uses the 
same object numbers than the target pdf
+                if 
(objectPool.getObject(cosObject.getKey()).equals(cosObject.getObject()))
                 {
-                    addStructure(value);
+                    return false;
                 }
+                cosObject.setKey(null);
             }
+            if (cosObject.getObject() != null)
+            {
+                return true;
+            }
+        }
+        else if (element instanceof COSArray
+                || (element instanceof COSDictionary && 
!allDirectObjects.contains(element)))
+        {
+            allDirectObjects.add(element);
+            return true;
         }
+        return false;
     }
 
     /**

Added: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterCompressionPoolTest.java
==============================================================================
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterCompressionPoolTest.java
     Sat Nov 22 10:08:53 2025        (r1929915)
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pdfbox.pdfwriter;
+
+import java.io.IOException;
+
+import org.apache.pdfbox.pdfwriter.compress.COSWriterCompressionPool;
+import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
+import org.apache.pdfbox.pdmodel.PDDocument;
+import 
org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDDocumentOutline;
+import 
org.apache.pdfbox.pdmodel.interactive.documentnavigation.outline.PDOutlineItem;
+import org.junit.jupiter.api.Test;
+
+class COSWriterCompressionPoolTest
+{
+    /**
+     * The old implementation may run into a stack overflow whenever the 
recursion depth gets too deep to be processed
+     * when collecting the objects to be compressed.
+     * 
+     * The new solution replaces the recursion with an iteration.
+     * 
+     * @throws IOException
+     */
+    @Test
+    void testPDFBox6036() throws IOException
+    {
+        for (int i = 1; i <= 222_222; i *= 2)
+        {
+            try (PDDocument document = new PDDocument())
+            {
+                PDDocumentOutline outline = new PDDocumentOutline();
+                document.getDocumentCatalog().setDocumentOutline(outline);
+                for (int j = 0; j < i; j++)
+                {
+                    outline.addLast(new PDOutlineItem());
+                }
+                new COSWriterCompressionPool(document, 
CompressParameters.DEFAULT_COMPRESSION);
+            }
+        }
+    }
+
+}

Reply via email to