Author: lehmi
Date: Tue Dec 16 07:16:49 2025
New Revision: 1930615

Log:
PDFBOX-6036: avoid overlapping object keys when importing pages from another pdf

Modified:
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
   
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java

Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java       
Tue Dec 16 04:20:02 2025        (r1930614)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java       
Tue Dec 16 07:16:49 2025        (r1930615)
@@ -772,18 +772,18 @@ public class COSArray extends COSBase im
 
     /**
      * Collects all indirect objects numbers within this COSArray and all 
included dictionaries. It is used to avoid
-     * mixed up object numbers when importing an existing page to another pdf.
+     * overlapping object numbers when importing an existing page to another 
pdf.
      * 
      * Expert use only. You might run into an endless recursion if choosing a 
wrong starting point.
      * 
      * @param indirectObjects a collection of already found indirect objects.
      * 
      */
-    public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
+    protected Collection<COSObjectKey> 
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
     {
         if (indirectObjects == null)
         {
-            return;
+            return indirectObjects;
         }
         COSObjectKey key = getKey();
         if (key != null)
@@ -791,9 +791,11 @@ public class COSArray extends COSBase im
             // avoid endless recursions
             if (indirectObjects.contains(key))
             {
-                return;
+                return indirectObjects;
             }
             indirectObjects.add(key);
+            // reset key
+            setKey(null);
         }
         for (COSBase cosBase : objects)
         {
@@ -808,18 +810,21 @@ public class COSArray extends COSBase im
                 {
                     continue;
                 }
-                // dereference object
-                cosBase = ((COSObject) cosBase).getObject();
+                // dereference object first
+                COSBase dereferencedObject = ((COSObject) cosBase).getObject();
+                // reset key
+                cosBase.setKey(null);
+                cosBase = dereferencedObject;
             }
             if (cosBase instanceof COSDictionary)
             {
-                // descend to included dictionary to collect all included 
indirect objects
-                ((COSDictionary) 
cosBase).getIndirectObjectKeys(indirectObjects);
+                // descend to included dictionary to reset all included 
indirect objects
+                ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
             }
             else if (cosBase instanceof COSArray)
             {
-                // descend to included array to collect all included indirect 
objects
-                ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects);
+                // descend to included array to reset all included indirect 
objects
+                ((COSArray) cosBase).resetObjectKeys(indirectObjects);
             }
             else if (indirectObjectKey != null)
             {
@@ -827,6 +832,7 @@ public class COSArray extends COSBase im
                 indirectObjects.add(indirectObjectKey);
             }
         }
+        return indirectObjects;
     }
 
     // wrap indirect objects

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java  
Tue Dec 16 04:20:02 2025        (r1930614)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java  
Tue Dec 16 07:16:49 2025        (r1930615)
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -1425,19 +1426,27 @@ public class COSDictionary extends COSBa
     }
 
     /**
+     * Reset all object keys to avoid overlapping numbers when saving the new 
pdf.
+     */
+    public void resetImportedObjectKeys()
+    {
+        resetObjectKeys(new HashSet<>()).clear();
+    }
+
+    /**
      * Collects all indirect objects numbers within this dictionary and all 
included dictionaries. It is used to avoid
-     * mixed up object numbers when importing an existing page to another pdf.
+     * overlapping object numbers when importing an existing page to another 
pdf.
      * 
      * Expert use only. You might run into an endless recursion if choosing a 
wrong starting point.
      * 
      * @param indirectObjects a collection of already found indirect objects.
      * 
      */
-    public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
+    protected Collection<COSObjectKey> 
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
     {
         if (indirectObjects == null)
         {
-            return;
+            return indirectObjects;
         }
         COSObjectKey key = getKey();
         if (key != null)
@@ -1445,9 +1454,11 @@ public class COSDictionary extends COSBa
             // avoid endless recursions
             if (indirectObjects.contains(key))
             {
-                return;
+                return indirectObjects;
             }
             indirectObjects.add(key);
+            // reset object key
+            setKey(null);
         }
         for (Entry<COSName, COSBase> entry : items.entrySet())
         {
@@ -1460,23 +1471,25 @@ public class COSDictionary extends COSBa
                 {
                     continue;
                 }
-                // dereference object
+                // dereference object first
                 cosBase = ((COSObject) cosBase).getObject();
+                // reset object key
+                entry.getValue().setKey(null);
             }
             if (cosBase instanceof COSDictionary)
             {
                 COSName entryKey = entry.getKey();
-                // descend to included dictionary to collect all included 
indirect objects
+                // descend to included dictionary to reset all included 
indirect objects
                 // skip PARENT and P references to avoid recursions
                 if (!COSName.PARENT.equals(entryKey) && 
!COSName.P.equals(entryKey))
                 {
-                    ((COSDictionary) 
cosBase).getIndirectObjectKeys(indirectObjects);
+                    ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
                 }
             }
             else if (cosBase instanceof COSArray)
             {
-                // descend to included array to collect all included indirect 
objects
-                ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects);
+                // descend to included array to reset all included indirect 
objects
+                ((COSArray) cosBase).resetObjectKeys(indirectObjects);
             }
             else if (indirectObjectKey != null)
             {
@@ -1484,6 +1497,7 @@ public class COSDictionary extends COSBa
                 indirectObjects.add(indirectObjectKey);
             }
         }
+        return indirectObjects;
     }
 
 }

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java 
Tue Dec 16 04:20:02 2025        (r1930614)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java 
Tue Dec 16 07:16:49 2025        (r1930615)
@@ -28,7 +28,6 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -45,7 +44,6 @@ import org.apache.pdfbox.cos.COSDocument
 import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.cos.COSUpdateInfo;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
@@ -152,9 +150,6 @@ public class PDDocument implements Close
     // to make sure only one signature is added
     private boolean signatureAdded = false;
 
-    // cache for the key of all imported indirect objects
-    private final Collection<COSObjectKey> indirectObjectKeys = new 
HashSet<>();
-
     /**
      * Creates an empty PDF document.
      * You need to add at least one page for the document to be valid.
@@ -240,7 +235,6 @@ public class PDDocument implements Close
     public void addPage(PDPage page)
     {
         getPages().add(page);
-        setHighestImportedObjectNumber(page);
     }
 
     /**
@@ -703,6 +697,8 @@ public class PDDocument implements Close
         importedPage.getCOSObject().removeItem(COSName.PARENT);
         PDStream dest = new PDStream(this, page.getContents(), 
COSName.FLATE_DECODE);
         importedPage.setContents(dest);
+        // reset imported object keys to avoid overlapping object numbers
+        importedPage.getCOSObject().resetImportedObjectKeys();
         addPage(importedPage);
         importedPage.setCropBox(new 
PDRectangle(page.getCropBox().getCOSArray()));
         importedPage.setMediaBox(new 
PDRectangle(page.getMediaBox().getCOSArray()));
@@ -716,21 +712,6 @@ public class PDDocument implements Close
     }
 
     /**
-     * Determine the highest object number from the imported page to avoid 
mixed up numbers when saving the new pdf.
-     * 
-     * @param importedPage the imported page.
-     */
-    private void setHighestImportedObjectNumber(PDPage importedPage)
-    {
-        importedPage.getCOSObject().getIndirectObjectKeys(indirectObjectKeys);
-        long highestImportedNumber = 
indirectObjectKeys.stream().map(COSObjectKey::getNumber)
-                .max(Long::compare).orElse(0L);
-        long highestXRefObjectNumber = 
getDocument().getHighestXRefObjectNumber();
-        getDocument().setHighestXRefObjectNumber(
-                Math.max(highestXRefObjectNumber, highestImportedNumber));
-    }
-
-    /**
      * This will get the low level document.
      * 
      * @return The document that this layer sits on top of.

Modified: 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
==============================================================================
--- 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
    Tue Dec 16 04:20:02 2025        (r1930614)
+++ 
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
    Tue Dec 16 07:16:49 2025        (r1930615)
@@ -23,6 +23,8 @@ import java.io.BufferedOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.nio.file.Paths;
 
 import org.apache.pdfbox.Loader;
@@ -148,4 +150,30 @@ class COSWriterTest
         }
     }
 
+    @Test
+    void testPDFBox5752() throws IOException, URISyntaxException
+    {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        byte[] emptyPDF = new URI(
+                
"https://issues.apache.org/jira/secure/attachment/13066015/empty.pdf";).toURL()
+                        .openStream().readAllBytes();
+        byte[] roboPDF = new URI(
+                
"https://issues.apache.org/jira/secure/attachment/13066016/roboto-14.pdf";).toURL()
+                        .openStream().readAllBytes();
+        try (PDDocument targetDoc = Loader.loadPDF(emptyPDF);
+                PDDocument doc2 = Loader.loadPDF(roboPDF))
+        {
+            PDPage sourcePage = doc2.getPage(0);
+            targetDoc.importPage(sourcePage);
+            targetDoc.save(baos);
+        }
+        try (PDDocument targetDoc = Loader.loadPDF(baos.toByteArray()))
+        {
+            
assertNotNull(targetDoc.getDocumentCatalog().getStructureTreeRoot());
+            PDResources res = targetDoc.getPage(1).getResources();
+            assertEquals("BCDEEE+Roboto-Regular", 
res.getFont(COSName.getPDFName("F1")).getName());
+            assertEquals("BCDFEE+Roboto-Regular", 
res.getFont(COSName.getPDFName("F2")).getName());
+        }
+    }
+
 }

Reply via email to