[ 
https://issues.apache.org/jira/browse/PDFBOX-6194?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18074853#comment-18074853
 ] 

Tilman Hausherr edited comment on PDFBOX-6194 at 4/20/26 6:58 PM:
------------------------------------------------------------------

Good file:  [^000025.pdf]  bad result file:  [^bad-000025.pdf] (one content 
stream is null)

code used:
{code:java}
public class PDFBOX6194
{
    public static final PDType1Font font = new 
PDType1Font(Standard14Fonts.FontName.COURIER);

    public static void main(String[] args) throws IOException
    {
        String dirName = "XXXXXXXXXXXX";
        File dir = new File(dirName);
        File files[] = dir.listFiles((File __, String name) -> 
name.endsWith(".pdf"));
        for (File file : files)
        {
            System.out.println("processing file: " + file.getName());
            BufferedImage bim1a;
            BufferedImage bim2a;
            BufferedImage bim3a;
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            try (PDDocument doc = Loader.loadPDF(file))
            {
                PDFRenderer r = new PDFRenderer(doc);
                if (doc.getNumberOfPages() < 3)
                {
                    continue;
                }
                try
                {
                    bim1a = r.renderImage(0);
                    bim2a = r.renderImage(1);
                    bim3a = r.renderImage(2);
                }
                catch (Throwable t)
                {
                    continue;
                }

                for (int p = 0; p < 3; ++p)
                {
                    try (PDPageContentStream cs = new PDPageContentStream(doc, 
doc.getPage(p), AppendMode.APPEND, true, true))
                    {
                        cs.setFont(font, 50);
                        cs.beginText();
                        cs.showText(" ");
                        cs.endText();
                    }
                }
                doc.setAllSecurityToBeRemoved(true);
                doc.save(baos);
            }
            catch (Throwable t)
            {
                continue;
            }
            BufferedImage bim1b;
            BufferedImage bim2b;
            BufferedImage bim3b;
            try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
            {
                PDFRenderer r = new PDFRenderer(doc);
                bim1b = r.renderImage(0);
                bim2b = r.renderImage(1);
                bim3b = r.renderImage(2);
                boolean b1 = Arrays.equals(((DataBufferInt) 
bim1a.getRaster().getDataBuffer()).getData(), ((DataBufferInt) 
bim1b.getRaster().getDataBuffer()).getData());
                boolean b2 = Arrays.equals(((DataBufferInt) 
bim2a.getRaster().getDataBuffer()).getData(), ((DataBufferInt) 
bim2b.getRaster().getDataBuffer()).getData());
                boolean b3 = Arrays.equals(((DataBufferInt) 
bim3a.getRaster().getDataBuffer()).getData(), ((DataBufferInt) 
bim3b.getRaster().getDataBuffer()).getData());
                if (!b1 || !b2 || !b3)
                {
                    System.err.println("bad rendering: " + file.getName() + " 
different");
                    try (OutputStream os = 
Files.newOutputStream(Paths.get("XXXXXXX/bad-" + file.getName())))
                    {
                        os.write(baos.toByteArray());
                    }
                }
            }
            catch (Throwable t)
            {
                t.printStackTrace();
                System.err.println("bad exception: " + file.getName() + ": " + 
t.getClass().getSimpleName());
                try (OutputStream os = 
Files.newOutputStream(Paths.get("XXXXXXXXXX/bad-" + file.getName())))
                {
                    os.write(baos.toByteArray());
                }
            }
        }
    }
}
{code}


was (Author: tilman):
Good file:  [^000025.pdf]  bad result file:  [^bad-000025.pdf] (one content 
stream is null)

code used:
{code:java}
public class PDFBOX6194
{
    public static final PDType1Font font = new 
PDType1Font(Standard14Fonts.FontName.COURIER);

    public static void main(String[] args) throws IOException
    {
        String dirName = "XXXXXXXXXXXX";
        File dir = new File(dirName);
        File files[] = dir.listFiles((File __, String name) -> 
name.endsWith(".pdf"));
        for (File file : files)
        {
            System.out.println("processing file: " + file.getName());
            BufferedImage bim1a;
            BufferedImage bim2a;
            BufferedImage bim3a;
            ByteArrayOutputStream baos = new ByteArrayOutputStream();
            try (PDDocument doc = Loader.loadPDF(file))
            {
                PDFRenderer r = new PDFRenderer(doc);
                if (doc.getNumberOfPages() < 3)
                {
                    continue;
                }
                try
                {
                    bim1a = r.renderImage(0);
                    bim2a = r.renderImage(1);
                    bim3a = r.renderImage(2);
                }
                catch (Throwable t)
                {
                    continue;
                }

                for (int p = 0; p < 3; ++p)
                {
                    try (PDPageContentStream cs = new PDPageContentStream(doc, 
doc.getPage(p), AppendMode.APPEND, true, true))
                    {
                        cs.setFont(font, 50);
                        cs.beginText();
                        cs.showText(" ");
                        cs.endText();
                    }
                }
                doc.setAllSecurityToBeRemoved(true);
                doc.save(baos);
            }
            catch (Throwable t)
            {
                continue;
            }
            BufferedImage bim1b;
            BufferedImage bim2b;
            BufferedImage bim3b;
            try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
            {
                PDFRenderer r = new PDFRenderer(doc);
                bim1b = r.renderImage(0);
                bim2b = r.renderImage(1);
                bim3b = r.renderImage(2);
                boolean b1 = compareDatabuffer((DataBufferInt) 
bim1a.getRaster().getDataBuffer(), (DataBufferInt) 
bim1b.getRaster().getDataBuffer());
                boolean b2 = compareDatabuffer((DataBufferInt) 
bim2a.getRaster().getDataBuffer(), (DataBufferInt) 
bim2b.getRaster().getDataBuffer());
                boolean b3 = compareDatabuffer((DataBufferInt) 
bim3a.getRaster().getDataBuffer(), (DataBufferInt) 
bim3b.getRaster().getDataBuffer());
                if (!b1 || !b2 || !b3)
                {
                    System.err.println("bad rendering: " + file.getName() + " 
different");
                    try (OutputStream os = 
Files.newOutputStream(Paths.get("XXXXXXX/bad-" + file.getName())))
                    {
                        os.write(baos.toByteArray());
                    }
                }
            }
            catch (Throwable t)
            {
                t.printStackTrace();
                System.err.println("bad exception: " + file.getName() + ": " + 
t.getClass().getSimpleName());
                try (OutputStream os = 
Files.newOutputStream(Paths.get("XXXXXXXXXX/bad-" + file.getName())))
                {
                    os.write(baos.toByteArray());
                }
            }
        }
    }

    private static boolean compareDatabuffer(DataBufferInt b1, DataBufferInt b2)
    {
        int[] data1 = b1.getData();
        int[] data2 = b2.getData();
        if (data1.length != data2.length)
        {
            return false;
        }
        for (int i = 0; i < data1.length; ++i)
        {
            if (data1[i] != data2[i])
            {
                return false;
            }
        }
        return true;
    }
}
{code}

> COSStream becomes COSDictionary after save — shared XObject reference 
> replaced by Font
> --------------------------------------------------------------------------------------
>
>                 Key: PDFBOX-6194
>                 URL: https://issues.apache.org/jira/browse/PDFBOX-6194
>             Project: PDFBox
>          Issue Type: Bug
>          Components: PDModel
>    Affects Versions: 3.0.7 PDFBox
>         Environment: Windows Server 2016, Java 21, PDFBox 3.0.7
>            Reporter: HABA
>            Priority: Major
>         Attachments: 000025.pdf, bad-000025.pdf, 
> image-2026-04-20-12-33-11-057.png, image-2026-04-20-13-52-20-247.png, 
> image-2026-04-20-13-52-44-302.png, screenshot-1.png
>
>
> Hi,
> `document.save()` corrupts an `/XObject` on page 3 of a 3-page PDF.
> Before save:
> - `Obj5` = `COSStream` (ImageMask)
> After save:
> - `Obj5` = `COSDictionary` (Courier font)
> Pages 1–2 are unaffected. All pages share the same indirect XObject refs 
> (`Obj4`, `Obj5`).
> Flow:
> - load PDF
> - render pages via `PDFRenderer.renderImageWithDPI()`
> - append invisible OCR text using `PDPageContentStream` (AppendMode.APPEND, 
> Courier)
> - save document → corruption occurs
> Result:
> java.io.IOException: Unexpected object type: COSDictionary
>  
> Reproduced consistently on:
>  * Windows Server 2016, Java 21, PDFBox 3.0.7
> Not reproducible on:
>  * Windows 11, Java 21 (same code + input)
> Likely related to shared indirect XObject being overwritten during save.
> Cannot share original PDF (confidential), but can test with synthetic 
> reproducer if needed.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to