[ 
https://issues.apache.org/jira/browse/PDFBOX-6194?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18077897#comment-18077897
 ] 

Tilman Hausherr commented on PDFBOX-6194:
-----------------------------------------

new stand alone code:
{code:java}
package com.mycompany.mavenpdfboxtest;

import java.awt.image.BufferedImage;
import java.awt.image.DataBufferInt;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.Arrays;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.PDPageContentStream.AppendMode;
import org.apache.pdfbox.pdmodel.font.PDType1Font;
import org.apache.pdfbox.pdmodel.font.Standard14Fonts;
import org.apache.pdfbox.rendering.PDFRenderer;

public class PDFBOX6194_2
{
    public static final PDType1Font font = new 
PDType1Font(Standard14Fonts.FontName.COURIER);

    public static void main(String[] args) throws IOException
    {
        doStuff(new File("XXXX/000012.pdf"));
        doStuff(new File("XXXX/000016.pdf"));
    }

    private static void doStuff(File file) throws IOException
    {
        BufferedImage bim1a;
        BufferedImage bim2a;
        BufferedImage bim3a;
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        try (PDDocument doc = Loader.loadPDF(file))
        {
            PDFRenderer r = new PDFRenderer(doc);
            bim1a = r.renderImage(0);
            bim2a = r.renderImage(1);
            bim3a = r.renderImage(2);

            for (int p = 0; p < 3; ++p)
            {
                try (PDPageContentStream cs = new PDPageContentStream(doc, 
doc.getPage(p), AppendMode.APPEND, true, true))
                {
                    cs.setFont(font, 50);
                    cs.beginText();
                    cs.showText(" ");
                    cs.endText();
                }
            }
            doc.setAllSecurityToBeRemoved(true);
            doc.save(baos);
        }
        BufferedImage bim1b;
        BufferedImage bim2b;
        BufferedImage bim3b;
        try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
        {
            PDFRenderer r = new PDFRenderer(doc);
            bim1b = r.renderImage(0);
            bim2b = r.renderImage(1);
            bim3b = r.renderImage(2);
            boolean b1 = Arrays.equals(((DataBufferInt) 
bim1a.getRaster().getDataBuffer()).getData(), ((DataBufferInt) 
bim1b.getRaster().getDataBuffer()).getData());
            boolean b2 = Arrays.equals(((DataBufferInt) 
bim2a.getRaster().getDataBuffer()).getData(), ((DataBufferInt) 
bim2b.getRaster().getDataBuffer()).getData());
            boolean b3 = Arrays.equals(((DataBufferInt) 
bim3a.getRaster().getDataBuffer()).getData(), ((DataBufferInt) 
bim3b.getRaster().getDataBuffer()).getData());
            if (!b1 || !b2 || !b3)
            {
                System.err.println("bad rendering: " + file.getName() + " 
different");
                try (OutputStream os = 
Files.newOutputStream(Paths.get("XXXXX/bad-" + file.getName())))
                {
                    os.write(baos.toByteArray());
                }
            }
        }
        catch (Throwable t)
        {
            t.printStackTrace();
            System.err.println("bad exception: " + file.getName() + ": " + 
t.getClass().getSimpleName());
            try (OutputStream os = Files.newOutputStream(Paths.get("XXXX/bad-" 
+ file.getName())))
            {
                os.write(baos.toByteArray());
            }
        }
    }
}
{code}
output:

03.05.2026 13:48:33 ERROR cos.COSObject:122 - Can't dereference COSObject{56 0 
R}
java.io.IOException: Length object content was not read.
        at org.apache.pdfbox.pdfparser.COSParser.getLength(COSParser.java:872)
        at 
org.apache.pdfbox.pdfparser.COSParser.parseCOSStream(COSParser.java:914)
        at 
org.apache.pdfbox.pdfparser.COSParser.parseFileObject(COSParser.java:750)
        at 
org.apache.pdfbox.pdfparser.COSParser.parseObjectDynamically(COSParser.java:671)
        at 
org.apache.pdfbox.pdfparser.COSParser.dereferenceCOSObject(COSParser.java:625)
        at org.apache.pdfbox.cos.COSObject.getObject(COSObject.java:117)
        at 
org.apache.pdfbox.pdmodel.PDPage.lambda$getContentsForRandomAccess$0(PDPage.java:304)
        at 
java.base/java.util.stream.ReferencePipeline$3$1.accept(ReferencePipeline.java:195)
        at 
java.base/java.util.ArrayList$ArrayListSpliterator.forEachRemaining(ArrayList.java:1655)
        at 
java.base/java.util.stream.AbstractPipeline.copyInto(AbstractPipeline.java:484)
        at 
java.base/java.util.stream.AbstractPipeline.wrapAndCopyInto(AbstractPipeline.java:474)
        at 
java.base/java.util.stream.ReduceOps$ReduceOp.evaluateSequential(ReduceOps.java:913)
        at 
java.base/java.util.stream.AbstractPipeline.evaluate(AbstractPipeline.java:234)
        at 
java.base/java.util.stream.ReferencePipeline.collect(ReferencePipeline.java:578)
        at 
org.apache.pdfbox.pdmodel.PDPage.getContentsForRandomAccess(PDPage.java:307)
        at 
org.apache.pdfbox.pdmodel.PDPage.getContentsForStreamParsing(PDPage.java:281)
        at 
org.apache.pdfbox.pdfparser.PDFStreamParser.<init>(PDFStreamParser.java:66)
        at 
org.apache.pdfbox.contentstream.PDFStreamEngine.processStreamOperators(PDFStreamEngine.java:541)
        at 
org.apache.pdfbox.contentstream.PDFStreamEngine.processStream(PDFStreamEngine.java:522)
        at 
org.apache.pdfbox.contentstream.PDFStreamEngine.processPage(PDFStreamEngine.java:158)
        at org.apache.pdfbox.rendering.PageDrawer.drawPage(PageDrawer.java:289)
        at 
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:348)
        at 
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:262)
        at 
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:213)
        at 
org.apache.pdfbox.rendering.PDFRenderer.renderImage(PDFRenderer.java:197)
        at 
com.mycompany.mavenpdfboxtest.PDFBOX6194_2.doStuff(PDFBOX6194_2.java:62)
        at com.mycompany.mavenpdfboxtest.PDFBOX6194_2.main(PDFBOX6194_2.java:27)
bad rendering: 000016.pdf different


> COSStream becomes COSDictionary after save — shared XObject reference 
> replaced by Font
> --------------------------------------------------------------------------------------
>
>                 Key: PDFBOX-6194
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-6194
>             Project: PDFBox
>          Issue Type: Bug
>          Components: PDModel
>    Affects Versions: 3.0.7 PDFBox
>         Environment: Windows Server 2016, Java 21, PDFBox 3.0.7
>            Reporter: HABA
>            Priority: Major
>         Attachments: 000012.pdf, 000016.pdf, 000025.pdf, bad-000025.pdf, 
> image-2026-04-20-12-33-11-057.png, image-2026-04-20-13-52-20-247.png, 
> image-2026-04-20-13-52-44-302.png, image-2026-05-01-19-07-19-330.png, 
> screenshot-1.png
>
>
> Hi,
> `document.save()` corrupts an `/XObject` on page 3 of a 3-page PDF.
> Before save:
> - `Obj5` = `COSStream` (ImageMask)
> After save:
> - `Obj5` = `COSDictionary` (Courier font)
> Pages 1–2 are unaffected. All pages share the same indirect XObject refs 
> (`Obj4`, `Obj5`).
> Flow:
> - load PDF
> - render pages via `PDFRenderer.renderImageWithDPI()`
> - append invisible OCR text using `PDPageContentStream` (AppendMode.APPEND, 
> Courier)
> - save document → corruption occurs
> Result:
> java.io.IOException: Unexpected object type: COSDictionary
>  
> Reproduced consistently on:
>  * Windows Server 2016, Java 21, PDFBox 3.0.7
> Not reproducible on:
>  * Windows 11, Java 21 (same code + input)
> Likely related to shared indirect XObject being overwritten during save.
> Cannot share original PDF (confidential), but can test with synthetic 
> reproducer if needed.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to