Author: lehmi
Date: Tue Dec 16 07:20:09 2025
New Revision: 1930616
Log:
PDFBOX-6036: avoid overlapping object keys when importing pages from another pdf
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
Tue Dec 16 07:16:49 2025 (r1930615)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSArray.java
Tue Dec 16 07:20:09 2025 (r1930616)
@@ -753,7 +753,7 @@ public class COSArray extends COSBase im
*
* @param indirectObjects a list of already found indirect objects.
*
- * @deprecated, use {@link #getIndirectObjectKeys(Collection)} instead
+ * @deprecated, will be removed in 4.0
*/
public void getIndirectObjectKeys(List<COSObjectKey> indirectObjects)
{
@@ -768,6 +768,7 @@ public class COSArray extends COSBase im
*
* @param indirectObjects a collection of already found indirect objects.
*
+ * @deprecated, will be removed in 4.0
*/
public void getIndirectObjectKeys(Collection<COSObjectKey> indirectObjects)
{
@@ -819,6 +820,72 @@ public class COSArray extends COSBase im
}
}
+ /**
+ * Resets the object key of all indirect objects numbers within this array
and all included dictionaries and arrays.
+ * It is used to avoid overlapping object numbers when importing an
existing page to another pdf.
+ *
+ * Expert use only. This might trigger some unwanted side effects.
+ *
+ * @param indirectObjects a collection of already found indirect objects.
+ *
+ * @return the updated collection of indirect objects
+ */
+ protected Collection<COSObjectKey>
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
+ {
+ if (indirectObjects == null)
+ {
+ return indirectObjects;
+ }
+ COSObjectKey key = getKey();
+ if (key != null)
+ {
+ // avoid endless recursions
+ if (indirectObjects.contains(key))
+ {
+ return indirectObjects;
+ }
+ indirectObjects.add(key);
+ // reset key
+ setKey(null);
+ }
+ for (COSBase cosBase : objects)
+ {
+ if (cosBase == null)
+ {
+ continue;
+ }
+ COSObjectKey indirectObjectKey = cosBase instanceof COSObject ?
cosBase.getKey() : null;
+ if (indirectObjectKey != null)
+ {
+ if (indirectObjects.contains(indirectObjectKey))
+ {
+ continue;
+ }
+ // dereference object first
+ COSBase dereferencedObject = ((COSObject) cosBase).getObject();
+ // reset key
+ cosBase.setKey(null);
+ cosBase = dereferencedObject;
+ }
+ if (cosBase instanceof COSDictionary)
+ {
+ // descend to included dictionary to reset all included
indirect objects
+ ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
+ }
+ else if (cosBase instanceof COSArray)
+ {
+ // descend to included array to reset all included indirect
objects
+ ((COSArray) cosBase).resetObjectKeys(indirectObjects);
+ }
+ else if (indirectObjectKey != null)
+ {
+ // add key for all indirect objects other than
COSDictionary/COSArray
+ indirectObjects.add(indirectObjectKey);
+ }
+ }
+ return indirectObjects;
+ }
+
// wrap indirect objects
private COSBase maybeWrap(COSBase object)
{
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
Tue Dec 16 07:16:49 2025 (r1930615)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/cos/COSDictionary.java
Tue Dec 16 07:20:09 2025 (r1930616)
@@ -22,6 +22,7 @@ import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
+import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
@@ -1503,4 +1504,84 @@ public class COSDictionary extends COSBa
}
}
+ /**
+ * Reset all object keys of all indirect objects after importing a page to
avoid overlapping numbers when saving the
+ * new pdf.
+ *
+ * Expert use only. This might trigger some unwanted side effects.
+ *
+ */
+ public void resetImportedObjectKeys()
+ {
+ resetObjectKeys(new HashSet<>()).clear();
+ }
+
+ /**
+ * Resets the object key of all indirect objects numbers within this
dictionary and all included dictionaries and
+ * arrays. It is used to avoid overlapping object numbers when importing
an existing page to another pdf.
+ *
+ * Expert use only. This might trigger some unwanted side effects.
+ *
+ * @param indirectObjects a collection of already found indirect objects.
+ *
+ * @return the updated collection of indirect objects
+ */
+ protected Collection<COSObjectKey>
resetObjectKeys(Collection<COSObjectKey> indirectObjects)
+ {
+ if (indirectObjects == null)
+ {
+ return indirectObjects;
+ }
+ COSObjectKey key = getKey();
+ if (key != null)
+ {
+ // avoid endless recursions
+ if (indirectObjects.contains(key))
+ {
+ return indirectObjects;
+ }
+ indirectObjects.add(key);
+ // reset object key
+ setKey(null);
+ }
+ for (Entry<COSName, COSBase> entry : items.entrySet())
+ {
+ COSBase cosBase = entry.getValue();
+ COSObjectKey indirectObjectKey = cosBase instanceof COSObject ?
cosBase.getKey() : null;
+ if (indirectObjectKey != null)
+ {
+ // avoid endless recursions
+ if (indirectObjects.contains(indirectObjectKey))
+ {
+ continue;
+ }
+ // dereference object first
+ cosBase = ((COSObject) cosBase).getObject();
+ // reset object key
+ entry.getValue().setKey(null);
+ }
+ if (cosBase instanceof COSDictionary)
+ {
+ COSName entryKey = entry.getKey();
+ // descend to included dictionary to reset all included
indirect objects
+ // skip PARENT and P references to avoid recursions
+ if (!COSName.PARENT.equals(entryKey) &&
!COSName.P.equals(entryKey))
+ {
+ ((COSDictionary) cosBase).resetObjectKeys(indirectObjects);
+ }
+ }
+ else if (cosBase instanceof COSArray)
+ {
+ // descend to included array to reset all included indirect
objects
+ ((COSArray) cosBase).resetObjectKeys(indirectObjects);
+ }
+ else if (indirectObjectKey != null)
+ {
+ // add key for all indirect objects other than
COSDictionary/COSArray
+ indirectObjects.add(indirectObjectKey);
+ }
+ }
+ return indirectObjects;
+ }
+
}
Modified:
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
Tue Dec 16 07:16:49 2025 (r1930615)
+++
pdfbox/branches/3.0/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDDocument.java
Tue Dec 16 07:20:09 2025 (r1930616)
@@ -28,7 +28,6 @@ import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
@@ -45,7 +44,6 @@ import org.apache.pdfbox.cos.COSDocument
import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
-import org.apache.pdfbox.cos.COSObjectKey;
import org.apache.pdfbox.cos.COSUpdateInfo;
import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.io.RandomAccessRead;
@@ -152,9 +150,6 @@ public class PDDocument implements Close
// to make sure only one signature is added
private boolean signatureAdded = false;
- // cache for the key of all imported indirect objects
- private final Collection<COSObjectKey> indirectObjectKeys = new
HashSet<>();
-
/**
* Creates an empty PDF document.
* You need to add at least one page for the document to be valid.
@@ -240,7 +235,6 @@ public class PDDocument implements Close
public void addPage(PDPage page)
{
getPages().add(page);
- setHighestImportedObjectNumber(page);
}
/**
@@ -700,6 +694,8 @@ public class PDDocument implements Close
importedPage.getCOSObject().removeItem(COSName.PARENT);
PDStream dest = new PDStream(this, page.getContents(),
COSName.FLATE_DECODE);
importedPage.setContents(dest);
+ // reset imported object keys to avoid overlapping object numbers
+ importedPage.getCOSObject().resetImportedObjectKeys();
addPage(importedPage);
importedPage.setCropBox(new
PDRectangle(page.getCropBox().getCOSArray()));
importedPage.setMediaBox(new
PDRectangle(page.getMediaBox().getCOSArray()));
@@ -713,21 +709,6 @@ public class PDDocument implements Close
}
/**
- * Determine the highest object number from the imported page to avoid
mixed up numbers when saving the new pdf.
- *
- * @param importedPage the imported page.
- */
- private void setHighestImportedObjectNumber(PDPage importedPage)
- {
- importedPage.getCOSObject().getIndirectObjectKeys(indirectObjectKeys);
- long highestImportedNumber =
indirectObjectKeys.stream().map(COSObjectKey::getNumber)
- .max(Long::compare).orElse(0L);
- long highestXRefObjectNumber =
getDocument().getHighestXRefObjectNumber();
- getDocument().setHighestXRefObjectNumber(
- Math.max(highestXRefObjectNumber, highestImportedNumber));
- }
-
- /**
* This will get the low level document.
*
* @return The document that this layer sits on top of.
Modified:
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
==============================================================================
---
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
Tue Dec 16 07:16:49 2025 (r1930615)
+++
pdfbox/branches/3.0/pdfbox/src/test/java/org/apache/pdfbox/pdfwriter/COSWriterTest.java
Tue Dec 16 07:20:09 2025 (r1930616)
@@ -23,12 +23,15 @@ import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
+import java.net.URI;
+import java.net.URISyntaxException;
import java.nio.file.Paths;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.cos.COSDocument;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObjectKey;
+import org.apache.pdfbox.io.IOUtils;
import org.apache.pdfbox.multipdf.PageExtractor;
import org.apache.pdfbox.pdfwriter.compress.CompressParameters;
import org.apache.pdfbox.pdmodel.PDDocument;
@@ -148,4 +151,30 @@ class COSWriterTest
}
}
+ @Test
+ void testPDFBox5752() throws IOException, URISyntaxException
+ {
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ byte[] emptyPDF = IOUtils.toByteArray(
+ new
URI("https://issues.apache.org/jira/secure/attachment/13066015/empty.pdf")
+ .toURL().openStream());
+ byte[] roboPDF = IOUtils.toByteArray(
+ new
URI("https://issues.apache.org/jira/secure/attachment/13066016/roboto-14.pdf")
+ .toURL().openStream());
+ try (PDDocument targetDoc = Loader.loadPDF(emptyPDF);
+ PDDocument doc2 = Loader.loadPDF(roboPDF))
+ {
+ PDPage sourcePage = doc2.getPage(0);
+ targetDoc.importPage(sourcePage);
+ targetDoc.save(baos);
+ }
+ try (PDDocument targetDoc = Loader.loadPDF(baos.toByteArray()))
+ {
+
assertNotNull(targetDoc.getDocumentCatalog().getStructureTreeRoot());
+ PDResources res = targetDoc.getPage(1).getResources();
+ assertEquals("BCDEEE+Roboto-Regular",
res.getFont(COSName.getPDFName("F1")).getName());
+ assertEquals("BCDFEE+Roboto-Regular",
res.getFont(COSName.getPDFName("F2")).getName());
+ }
+ }
+
}