Author: lehmi
Date: Tue Dec 16 07:16:49 2025
New Revision: 1930615
Log:
PDFBOX-6036: avoid overlapping object keys when importing pages from another pdf
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
Tue Dec 16 04:20:02 2025 (r1930614)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
Tue Dec 16 07:16:49 2025 (r1930615)
@@ -772,18 +772,18 @@ public class COSArray extends COSBase im
/**
* Collects all indirect objects numbers within this COSArray and all
included dictionaries. It is used to avoid
- * mixed up object numbers when importing an existing page to another pdf.
+ * overlapping object numbers when importing an existing page to another
pdf.
*
* Expert use only. You might run into an endless recursion if choosing a
wrong starting point.
*
* @param indirectObjects a collection of already found indirect objects.
*
*/
- public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
+ protected Collection<COSObjectKey>
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
{
if (indirectObjects == null)
{
- return;
+ return indirectObjects;
}
COSObjectKey key = getKey();
if (key != null)
@@ -791,9 +791,11 @@ public class COSArray extends COSBase im
// avoid endless recursions
if (indirectObjects.contains(key))
{
- return;
+ return indirectObjects;
}
indirectObjects.add(key);
+ // reset key
+ setKey(null);
}
for (COSBase cosBase : objects)
{
@@ -808,18 +810,21 @@ public class COSArray extends COSBase im
{
continue;
}
- // dereference object
- cosBase = ((COSObject) cosBase).getObject();
+ // dereference object first
+ COSBase dereferencedObject = ((COSObject) cosBase).getObject();
+ // reset key
+ cosBase.setKey(null);
+ cosBase = dereferencedObject;
}
if (cosBase instanceof COSDictionary)
{
- // descend to included dictionary to collect all included
indirect objects
- ((COSDictionary)
cosBase).getIndirectObjectKeys(indirectObjects);
+ // descend to included dictionary to reset all included
indirect objects
+ ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
}
else if (cosBase instanceof COSArray)
{
- // descend to included array to collect all included indirect
objects
- ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects);
+ // descend to included array to reset all included indirect
objects
+ ((COSArray) cosBase).resetObjectKeys(indirectObjects);
}
else if (indirectObjectKey != null)
{
@@ -827,6 +832,7 @@ public class COSArray extends COSBase im
indirectObjects.add(indirectObjectKey);
}
}
+ return indirectObjects;
}
// wrap indirect objects
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
Tue Dec 16 04:20:02 2025 (r1930614)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
Tue Dec 16 07:16:49 2025 (r1930615)
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -1425,19 +1426,27 @@ public class COSDictionary extends COSBa
}
/**
+ * Reset all object keys to avoid overlapping numbers when saving the new
pdf.
+ */
+ public void resetImportedObjectKeys()
+ {
+ resetObjectKeys(new HashSet<>()).clear();
+ }
+
+ /**
* Collects all indirect objects numbers within this dictionary and all
included dictionaries. It is used to avoid
- * mixed up object numbers when importing an existing page to another pdf.
+ * overlapping object numbers when importing an existing page to another
pdf.
*
* Expert use only. You might run into an endless recursion if choosing a
wrong starting point.
*
* @param indirectObjects a collection of already found indirect objects.
*
*/
- public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
+ protected Collection<COSObjectKey>
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
{
if (indirectObjects == null)
{
- return;
+ return indirectObjects;
}
COSObjectKey key = getKey();
if (key != null)
@@ -1445,9 +1454,11 @@ public class COSDictionary extends COSBa
// avoid endless recursions
if (indirectObjects.contains(key))
{
- return;
+ return indirectObjects;
}
indirectObjects.add(key);
+ // reset object key
+ setKey(null);
}
for (Entry<COSName, COSBase> entry : items.entrySet())
{
@@ -1460,23 +1471,25 @@ public class COSDictionary extends COSBa
{
continue;
}
- // dereference object
+ // dereference object first
cosBase = ((COSObject) cosBase).getObject();
+ // reset object key
+ entry.getValue().setKey(null);
}
if (cosBase instanceof COSDictionary)
{
COSName entryKey = entry.getKey();
- // descend to included dictionary to collect all included
indirect objects
+ // descend to included dictionary to reset all included
indirect objects
// skip PARENT and P references to avoid recursions
if (!COSName.PARENT.equals(entryKey) &&
!COSName.P.equals(entryKey))
{
- ((COSDictionary)
cosBase).getIndirectObjectKeys(indirectObjects);
+ ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
}
}
else if (cosBase instanceof COSArray)
{
- // descend to included array to collect all included indirect
objects
- ((COSArray) cosBase).getIndirectObjectKeys(indirectObjects);
+ // descend to included array to reset all included indirect
objects
+ ((COSArray) cosBase).resetObjectKeys(indirectObjects);
}
else if (indirectObjectKey != null)
{
@@ -1484,6 +1497,7 @@ public class COSDictionary extends COSBa
indirectObjects.add(indirectObjectKey);
}
}
+ return indirectObjects;
}
}
Modified:
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
Tue Dec 16 04:20:02 2025 (r1930614)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
Tue Dec 16 07:16:49 2025 (r1930615)
@@ -28,7 +28,6 @@ import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -45,7 +44,6 @@ import org.apache.pdfbox.cos.COSDocument
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.cos.COSUpdateInfo;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessRead;
@@ -152,9 +150,6 @@ public class PDDocument implements Close
// to make sure only one signature is added
private boolean signatureAdded = false;
- // cache for the key of all imported indirect objects
- private final Collection<COSObjectKey> indirectObjectKeys = new
HashSet<>();
-
/**
* Creates an empty PDF document.
* You need to add at least one page for the document to be valid.
@@ -240,7 +235,6 @@ public class PDDocument implements Close
public void addPage(PDPage page)
{
getPages().add(page);
- setHighestImportedObjectNumber(page);
}
/**
@@ -703,6 +697,8 @@ public class PDDocument implements Close
importedPage.getCOSObject().removeItem(COSName.PARENT);
PDStream dest = new PDStream(this, page.getContents(),
COSName.FLATE_DECODE);
importedPage.setContents(dest);
+ // reset imported object keys to avoid overlapping object numbers
+ importedPage.getCOSObject().resetImportedObjectKeys();
addPage(importedPage);
importedPage.setCropBox(new
PDRectangle(page.getCropBox().getCOSArray()));
importedPage.setMediaBox(new
PDRectangle(page.getMediaBox().getCOSArray()));
@@ -716,21 +712,6 @@ public class PDDocument implements Close
}
/**
- * Determine the highest object number from the imported page to avoid
mixed up numbers when saving the new pdf.
- *
- * @param importedPage the imported page.
- */
- private void setHighestImportedObjectNumber(PDPage importedPage)
- {
- importedPage.getCOSObject().getIndirectObjectKeys(indirectObjectKeys);
- long highestImportedNumber =
indirectObjectKeys.stream().map(COSObjectKey::getNumber)
- .max(Long::compare).orElse(0L);
- long highestXRefObjectNumber =
getDocument().getHighestXRefObjectNumber();
- getDocument().setHighestXRefObjectNumber(
- Math.max(highestXRefObjectNumber, highestImportedNumber));
- }
-
- /**
* This will get the low level document.
*
* @return The document that this layer sits on top of.
Modified:
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
==============================================================================
---
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
Tue Dec 16 04:20:02 2025 (r1930614)
+++
pdfbox/trunk/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
Tue Dec 16 07:16:49 2025 (r1930615)
@@ -23,6 +23,8 @@ import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.nio.file.Paths;
import org.apache.pdfbox.Loader;
@@ -148,4 +150,30 @@ class COSWriterTest
}
}
+ @Test
+ void testPDFBox5752() throws IOException, URISyntaxException
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ byte[] emptyPDF = new URI(
+
"https://issues.apache.org/jira/secure/attachment/13066015/empty.pdf").toURL()
+ .openStream().readAllBytes();
+ byte[] roboPDF = new URI(
+
"https://issues.apache.org/jira/secure/attachment/13066016/roboto-14.pdf").toURL()
+ .openStream().readAllBytes();
+ try (PDDocument targetDoc = Loader.loadPDF(emptyPDF);
+ PDDocument doc2 = Loader.loadPDF(roboPDF))
+ {
+ PDPage sourcePage = doc2.getPage(0);
+ targetDoc.importPage(sourcePage);
+ targetDoc.save(baos);
+ }
+ try (PDDocument targetDoc = Loader.loadPDF(baos.toByteArray()))
+ {
+
assertNotNull(targetDoc.getDocumentCatalog().getStructureTreeRoot());
+ PDResources res = targetDoc.getPage(1).getResources();
+ assertEquals("BCDEEE+Roboto-Regular",
res.getFont(COSName.getPDFName("F1")).getName());
+ assertEquals("BCDFEE+Roboto-Regular",
res.getFont(COSName.getPDFName("F2")).getName());
+ }
+ }
+
}