[
https://issues.apache.org/jira/browse/PDFBOX-6194?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18077897#comment-18077897
]
Tilman Hausherr commented on PDFBOX-6194:
-----------------------------------------
new stand alone code:
{code:java}
package com.mycompany.mavenpdfboxtest;
import java.awt.image.BufferedImage;
import java.awt.image.DataBufferInt;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.apache.pdfbox.rendering.PDFRenderer;
public class PDFBOX6194_2
{
public static final PDType1Font font = new
PDType1Font(Standard14Fonts.FontName.COURIER);
public static void main(String[] args) throws IOException
{
doStuff(new File("XXXX/000012.pdf"));
doStuff(new File("XXXX/000016.pdf"));
}
private static void doStuff(File file) throws IOException
{
BufferedImage bim1a;
BufferedImage bim2a;
BufferedImage bim3a;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (PDDocument doc = Loader.loadPDF(file))
{
PDFRenderer r = new PDFRenderer(doc);
bim1a = r.renderImage(0);
bim2a = r.renderImage(1);
bim3a = r.renderImage(2);
for (int p = 0; p < 3; ++p)
{
try (PDPageContentStream cs = new PDPageContentStream(doc,
doc.getPage(p), AppendMode.APPEND, true, true))
{
cs.setFont(font, 50);
cs.beginText();
cs.showText(" ");
cs.endText();
}
}
doc.setAllSecurityToBeRemoved(true);
doc.save(baos);
}
BufferedImage bim1b;
BufferedImage bim2b;
BufferedImage bim3b;
try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
{
PDFRenderer r = new PDFRenderer(doc);
bim1b = r.renderImage(0);
bim2b = r.renderImage(1);
bim3b = r.renderImage(2);
boolean b1 = Arrays.equals(((DataBufferInt)
bim1a.getRaster().getDataBuffer()).getData(), ((DataBufferInt)
bim1b.getRaster().getDataBuffer()).getData());
boolean b2 = Arrays.equals(((DataBufferInt)
bim2a.getRaster().getDataBuffer()).getData(), ((DataBufferInt)
bim2b.getRaster().getDataBuffer()).getData());
boolean b3 = Arrays.equals(((DataBufferInt)
bim3a.getRaster().getDataBuffer()).getData(), ((DataBufferInt)
bim3b.getRaster().getDataBuffer()).getData());
if (!b1 || !b2 || !b3)
{
System.err.println("bad rendering: " + file.getName() + "
different");
try (OutputStream os =
Files.newOutputStream(Paths.get("XXXXX/bad-" + file.getName())))
{
os.write(baos.toByteArray());
}
}
}
catch (Throwable t)
{
t.printStackTrace();
System.err.println("bad exception: " + file.getName() + ": " +
t.getClass().getSimpleName());
try (OutputStream os = Files.newOutputStream(Paths.get("XXXX/bad-"
+ file.getName())))
{
os.write(baos.toByteArray());
}
}
}
}
{code}
output:
03.05.2026 13:48:33 ERROR cos.COSObject:122 - Can't dereference COSObject{56 0
R}
java.io.IOException: Length object content was not read.
at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:872)
at
org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:914)
at
org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:750)
at
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:671)
at
org.apache.pdfbox.pdfparser.COSParser.dereferenceCOSObject(COSParser.java:625)
at org.apache.pdfbox.cos.COSObject.getObject(COSObject.java:117)
at
org.apache.pdfbox.pdmodel.PDPage.lambda$getContentsForRandomAccess$0(PDPage.java:304)
at
java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
at
java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1655)
at
java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
at
java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
at
java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
at
java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
at
java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
at
org.apache.pdfbox.pdmodel.PDPage.getContentsForRandomAccess(PDPage.java:307)
at
org.apache.pdfbox.pdmodel.PDPage.getContentsForStreamParsing(PDPage.java:281)
at
org.apache.pdfbox.pdfparser.PDFStreamParser.<init>(PDFStreamParser.java:66)
at
org.apache.pdfbox.contentstream.PDFStreamEngine.processStreamOperators(PDFStreamEngine.java:541)
at
org.apache.pdfbox.contentstream.PDFStreamEngine.processStream(PDFStreamEngine.java:522)
at
org.apache.pdfbox.contentstream.PDFStreamEngine.processPage(PDFStreamEngine.java:158)
at org.apache.pdfbox.rendering.PageDrawer.drawPage(PageDrawer.java:289)
at
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:348)
at
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:262)
at
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:213)
at
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:197)
at
com.mycompany.mavenpdfboxtest.PDFBOX6194_2.doStuff(PDFBOX6194_2.java:62)
at com.mycompany.mavenpdfboxtest.PDFBOX6194_2.main(PDFBOX6194_2.java:27)
bad rendering: 000016.pdf different
> COSStream becomes COSDictionary after save — shared XObject reference
> replaced by Font
> --------------------------------------------------------------------------------------
>
> Key: PDFBOX-6194
> URL: https://issues.apache.org/jira/browse/PDFBOX-6194
> Project: PDFBox
> Issue Type: Bug
> Components: PDModel
> Affects Versions: 3.0.7 PDFBox
> Environment: Windows Server 2016, Java 21, PDFBox 3.0.7
> Reporter: HABA
> Priority: Major
> Attachments: 000012.pdf, 000016.pdf, 000025.pdf, bad-000025.pdf,
> image-2026-04-20-12-33-11-057.png, image-2026-04-20-13-52-20-247.png,
> image-2026-04-20-13-52-44-302.png, image-2026-05-01-19-07-19-330.png,
> screenshot-1.png
>
>
> Hi,
> `document.save()` corrupts an `/XObject` on page 3 of a 3-page PDF.
> Before save:
> - `Obj5` = `COSStream` (ImageMask)
> After save:
> - `Obj5` = `COSDictionary` (Courier font)
> Pages 1–2 are unaffected. All pages share the same indirect XObject refs
> (`Obj4`, `Obj5`).
> Flow:
> - load PDF
> - render pages via `PDFRenderer.renderImageWithDPI()`
> - append invisible OCR text using `PDPageContentStream` (AppendMode.APPEND,
> Courier)
> - save document → corruption occurs
> Result:
> java.io.IOException: Unexpected object type: COSDictionary
>
> Reproduced consistently on:
> * Windows Server 2016, Java 21, PDFBox 3.0.7
> Not reproducible on:
> * Windows 11, Java 21 (same code + input)
> Likely related to shared indirect XObject being overwritten during save.
> Cannot share original PDF (confidential), but can test with synthetic
> reproducer if needed.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]