Author: lehmi
Date: Tue Dec 16 07:20:09 2025
New Revision: 1930616

Log:
PDFBOX-6036: avoid overlapping object keys when importing pages from another pdf

Modified:
   pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
   
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
   
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
   
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java

Modified: 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java    
    Tue Dec 16 07:16:49 2025        (r1930615)
+++ 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java    
    Tue Dec 16 07:20:09 2025        (r1930616)
@@ -753,7 +753,7 @@ public class COSArray extends COSBase im
      * 
      * @param indirectObjects a list of already found indirect objects.
      * 
-     * @deprecated, use {@link #getIndirectObjectKeys(Collection)} instead
+     * @deprecated, will be removed in 4.0
      */
     public void getIndirectObjectKeys(List<COSObjectKey> indirectObjects)
     {
@@ -768,6 +768,7 @@ public class COSArray extends COSBase im
      * 
      * @param indirectObjects a collection of already found indirect objects.
      * 
+     * @deprecated, will be removed in 4.0
      */
     public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
     {
@@ -819,6 +820,72 @@ public class COSArray extends COSBase im
         }
     }
 
+    /**
+     * Resets the object key of all indirect objects numbers within this array 
and all included dictionaries and arrays.
+     * It is used to avoid overlapping object numbers when importing an 
existing page to another pdf.
+     * 
+     * Expert use only. This might trigger some unwanted side effects.
+     * 
+     * @param indirectObjects a collection of already found indirect objects.
+     * 
+     * @return the updated collection of indirect objects
+     */
+    protected Collection<COSObjectKey> 
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
+    {
+        if (indirectObjects == null)
+        {
+            return indirectObjects;
+        }
+        COSObjectKey key = getKey();
+        if (key != null)
+        {
+            // avoid endless recursions
+            if (indirectObjects.contains(key))
+            {
+                return indirectObjects;
+            }
+            indirectObjects.add(key);
+            // reset key
+            setKey(null);
+        }
+        for (COSBase cosBase : objects)
+        {
+            if (cosBase == null)
+            {
+                continue;
+            }
+            COSObjectKey indirectObjectKey = cosBase instanceof COSObject ? 
cosBase.getKey() : null;
+            if (indirectObjectKey != null)
+            {
+                if (indirectObjects.contains(indirectObjectKey))
+                {
+                    continue;
+                }
+                // dereference object first
+                COSBase dereferencedObject = ((COSObject) cosBase).getObject();
+                // reset key
+                cosBase.setKey(null);
+                cosBase = dereferencedObject;
+            }
+            if (cosBase instanceof COSDictionary)
+            {
+                // descend to included dictionary to reset all included 
indirect objects
+                ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
+            }
+            else if (cosBase instanceof COSArray)
+            {
+                // descend to included array to reset all included indirect 
objects
+                ((COSArray) cosBase).resetObjectKeys(indirectObjects);
+            }
+            else if (indirectObjectKey != null)
+            {
+                // add key for all indirect objects other than 
COSDictionary/COSArray
+                indirectObjects.add(indirectObjectKey);
+            }
+        }
+        return indirectObjects;
+    }
+
     // wrap indirect objects
     private COSBase maybeWrap(COSBase object)
     {

Modified: 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
   Tue Dec 16 07:16:49 2025        (r1930615)
+++ 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
   Tue Dec 16 07:20:09 2025        (r1930616)
@@ -22,6 +22,7 @@ import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collection;
+import java.util.HashSet;
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.Map;
@@ -1503,4 +1504,84 @@ public class COSDictionary extends COSBa
         }
     }
 
+    /**
+     * Reset all object keys of all indirect objects after importing a page to 
avoid overlapping numbers when saving the
+     * new pdf.
+     * 
+     * Expert use only. This might trigger some unwanted side effects.
+     * 
+     */
+    public void resetImportedObjectKeys()
+    {
+        resetObjectKeys(new HashSet<>()).clear();
+    }
+
+    /**
+     * Resets the object key of all indirect objects numbers within this 
dictionary and all included dictionaries and
+     * arrays. It is used to avoid overlapping object numbers when importing 
an existing page to another pdf.
+     * 
+     * Expert use only. This might trigger some unwanted side effects.
+     * 
+     * @param indirectObjects a collection of already found indirect objects.
+     * 
+     * @return the updated collection of indirect objects
+     */
+    protected Collection<COSObjectKey> 
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
+    {
+        if (indirectObjects == null)
+        {
+            return indirectObjects;
+        }
+        COSObjectKey key = getKey();
+        if (key != null)
+        {
+            // avoid endless recursions
+            if (indirectObjects.contains(key))
+            {
+                return indirectObjects;
+            }
+            indirectObjects.add(key);
+            // reset object key
+            setKey(null);
+        }
+        for (Entry<COSName, COSBase> entry : items.entrySet())
+        {
+            COSBase cosBase = entry.getValue();
+            COSObjectKey indirectObjectKey = cosBase instanceof COSObject ? 
cosBase.getKey() : null;
+            if (indirectObjectKey != null)
+            {
+                // avoid endless recursions
+                if (indirectObjects.contains(indirectObjectKey))
+                {
+                    continue;
+                }
+                // dereference object first
+                cosBase = ((COSObject) cosBase).getObject();
+                // reset object key
+                entry.getValue().setKey(null);
+            }
+            if (cosBase instanceof COSDictionary)
+            {
+                COSName entryKey = entry.getKey();
+                // descend to included dictionary to reset all included 
indirect objects
+                // skip PARENT and P references to avoid recursions
+                if (!COSName.PARENT.equals(entryKey) && 
!COSName.P.equals(entryKey))
+                {
+                    ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
+                }
+            }
+            else if (cosBase instanceof COSArray)
+            {
+                // descend to included array to reset all included indirect 
objects
+                ((COSArray) cosBase).resetObjectKeys(indirectObjects);
+            }
+            else if (indirectObjectKey != null)
+            {
+                // add key for all indirect objects other than 
COSDictionary/COSArray
+                indirectObjects.add(indirectObjectKey);
+            }
+        }
+        return indirectObjects;
+    }
+
 }

Modified: 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
  Tue Dec 16 07:16:49 2025        (r1930615)
+++ 
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
  Tue Dec 16 07:20:09 2025        (r1930616)
@@ -28,7 +28,6 @@ import java.io.IOException;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collection;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.List;
@@ -45,7 +44,6 @@ import org.apache.pdfbox.cos.COSDocument
 import org.apache.pdfbox.cos.COSInteger;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSObjectKey;
 import org.apache.pdfbox.cos.COSUpdateInfo;
 import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.io.RandomAccessRead;
@@ -152,9 +150,6 @@ public class PDDocument implements Close
     // to make sure only one signature is added
     private boolean signatureAdded = false;
 
-    // cache for the key of all imported indirect objects
-    private final Collection<COSObjectKey> indirectObjectKeys = new 
HashSet<>();
-
     /**
      * Creates an empty PDF document.
      * You need to add at least one page for the document to be valid.
@@ -240,7 +235,6 @@ public class PDDocument implements Close
     public void addPage(PDPage page)
     {
         getPages().add(page);
-        setHighestImportedObjectNumber(page);
     }
 
     /**
@@ -700,6 +694,8 @@ public class PDDocument implements Close
         importedPage.getCOSObject().removeItem(COSName.PARENT);
         PDStream dest = new PDStream(this, page.getContents(), 
COSName.FLATE_DECODE);
         importedPage.setContents(dest);
+        // reset imported object keys to avoid overlapping object numbers
+        importedPage.getCOSObject().resetImportedObjectKeys();
         addPage(importedPage);
         importedPage.setCropBox(new 
PDRectangle(page.getCropBox().getCOSArray()));
         importedPage.setMediaBox(new 
PDRectangle(page.getMediaBox().getCOSArray()));
@@ -713,21 +709,6 @@ public class PDDocument implements Close
     }
 
     /**
-     * Determine the highest object number from the imported page to avoid 
mixed up numbers when saving the new pdf.
-     * 
-     * @param importedPage the imported page.
-     */
-    private void setHighestImportedObjectNumber(PDPage importedPage)
-    {
-        importedPage.getCOSObject().getIndirectObjectKeys(indirectObjectKeys);
-        long highestImportedNumber = 
indirectObjectKeys.stream().map(COSObjectKey::getNumber)
-                .max(Long::compare).orElse(0L);
-        long highestXRefObjectNumber = 
getDocument().getHighestXRefObjectNumber();
-        getDocument().setHighestXRefObjectNumber(
-                Math.max(highestXRefObjectNumber, highestImportedNumber));
-    }
-
-    /**
      * This will get the low level document.
      * 
      * @return The document that this layer sits on top of.

Modified: 
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
==============================================================================
--- 
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
     Tue Dec 16 07:16:49 2025        (r1930615)
+++ 
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
     Tue Dec 16 07:20:09 2025        (r1930616)
@@ -23,12 +23,15 @@ import java.io.BufferedOutputStream;
 import java.io.ByteArrayOutputStream;
 import java.io.File;
 import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
 import java.nio.file.Paths;
 
 import org.apache.pdfbox.Loader;
 import org.apache.pdfbox.cos.COSDocument;
 import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObjectKey;
+import org.apache.pdfbox.io.IOUtils;
 import org.apache.pdfbox.multipdf.PageExtractor;
 import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
 import org.apache.pdfbox.pdmodel.PDDocument;
@@ -148,4 +151,30 @@ class COSWriterTest
         }
     }
 
+    @Test
+    void testPDFBox5752() throws IOException, URISyntaxException
+    {
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        byte[] emptyPDF = IOUtils.toByteArray(
+                new 
URI("https://issues.apache.org/jira/secure/attachment/13066015/empty.pdf";)
+                        .toURL().openStream());
+        byte[] roboPDF = IOUtils.toByteArray(
+                new 
URI("https://issues.apache.org/jira/secure/attachment/13066016/roboto-14.pdf";)
+                        .toURL().openStream());
+        try (PDDocument targetDoc = Loader.loadPDF(emptyPDF);
+                PDDocument doc2 = Loader.loadPDF(roboPDF))
+        {
+            PDPage sourcePage = doc2.getPage(0);
+            targetDoc.importPage(sourcePage);
+            targetDoc.save(baos);
+        }
+        try (PDDocument targetDoc = Loader.loadPDF(baos.toByteArray()))
+        {
+            
assertNotNull(targetDoc.getDocumentCatalog().getStructureTreeRoot());
+            PDResources res = targetDoc.getPage(1).getResources();
+            assertEquals("BCDEEE+Roboto-Regular", 
res.getFont(COSName.getPDFName("F1")).getName());
+            assertEquals("BCDFEE+Roboto-Regular", 
res.getFont(COSName.getPDFName("F2")).getName());
+        }
+    }
+
 }

Reply via email to