Author: tilman
Date: Sat May 17 10:32:29 2025
New Revision: 1925618
URL: http://svn.apache.org/viewvc?rev=1925618&view=rev
Log:
PDFBOX-6009: remove structure elements without /Pg entry if there is at least
one MCID
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java
Modified:
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java
URL:
http://svn.apache.org/viewvc/pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java?rev=1925618&r1=1925617&r2=1925618&view=diff
==============================================================================
---
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java
(original)
+++
pdfbox/branches/2.0/pdfbox/src/main/java/org/apache/pdfbox/multipdf/Splitter.java
Sat May 17 10:32:29 2025
@@ -30,6 +30,7 @@ import org.apache.commons.logging.LogFac
import org.apache.pdfbox.cos.COSArray;
import org.apache.pdfbox.cos.COSBase;
import org.apache.pdfbox.cos.COSDictionary;
+import org.apache.pdfbox.cos.COSInteger;
import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.cos.COSObject;
import org.apache.pdfbox.io.MemoryUsageSetting;
@@ -413,23 +414,28 @@ public class Splitter
{
return dstDict;
}
+ COSDictionary srcPageDict = srcDict.getCOSDictionary(COSName.PG);
COSDictionary dstPageDict = null;
- if (srcDict.containsKey(COSName.PG))
+ COSBase kid = srcDict.getDictionaryObject(COSName.K);
+ if (srcPageDict != null)
{
- COSDictionary srcPageDict =
srcDict.getCOSDictionary(COSName.PG);
- if (srcPageDict == null)
- {
- return null;
- }
dstPageDict = pageDictMap.get(srcPageDict);
- if (dstPageDict == null)
+ if (dstPageDict != null)
{
- return null;
+ PDPage dstPage = new PDPage(dstPageDict);
+ if (dstPageTree.indexOf(dstPage) == -1)
+ {
+ return null;
+ }
}
- PDPage dstPage = new PDPage(dstPageDict);
- if (dstPageTree.indexOf(dstPage) == -1)
+ else
{
- return null;
+ // PDFBOX-6009: quit if MCIDs because these need a /Pg
entry
+ if (hasMCIDs(kid))
+ {
+ return null;
+ }
+ // else keep this as an intermediate element for now
}
}
@@ -471,7 +477,6 @@ public class Splitter
}
dstDict.setItem(COSName.PG, dstPageDict);
- COSBase kid = srcDict.getDictionaryObject(COSName.K);
// stack overflow here with 207658.pdf, too complex
COSBase cloneKid = createClone(kid, dstDict, dstPageDict != null ?
dstPageDict : currentPageDict);
@@ -502,6 +507,26 @@ public class Splitter
return dstDict;
}
+ private boolean hasMCIDs(COSBase kid)
+ {
+ if (kid instanceof COSInteger)
+ {
+ return true;
+ }
+ if (kid instanceof COSArray)
+ {
+ COSArray ar = (COSArray) kid;
+ for (int i = 0; i < ar.size(); ++i)
+ {
+ if (ar.getObject(i) instanceof COSInteger)
+ {
+ return true;
+ }
+ }
+ }
+ return false;
+ }
+
private void removePossibleOrphanAnnotation(COSDictionary srcObj,
COSDictionary srcDict,
COSDictionary currentPageDict, COSDictionary dstDict)
{