[
https://issues.apache.org/jira/browse/PDFBOX-6194?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=18074853#comment-18074853
]
Tilman Hausherr edited comment on PDFBOX-6194 at 4/20/26 6:58 PM:
------------------------------------------------------------------
Good file: [^000025.pdf] bad result file: [^bad-000025.pdf] (one content
stream is null)
code used:
{code:java}
public class PDFBOX6194
{
public static final PDType1Font font = new
PDType1Font(Standard14Fonts.FontName.COURIER);
public static void main(String[] args) throws IOException
{
String dirName = "XXXXXXXXXXXX";
File dir = new File(dirName);
File files[] = dir.listFiles((File __, String name) ->
name.endsWith(".pdf"));
for (File file : files)
{
System.out.println("processing file: " + file.getName());
BufferedImage bim1a;
BufferedImage bim2a;
BufferedImage bim3a;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (PDDocument doc = Loader.loadPDF(file))
{
PDFRenderer r = new PDFRenderer(doc);
if (doc.getNumberOfPages() < 3)
{
continue;
}
try
{
bim1a = r.renderImage(0);
bim2a = r.renderImage(1);
bim3a = r.renderImage(2);
}
catch (Throwable t)
{
continue;
}
for (int p = 0; p < 3; ++p)
{
try (PDPageContentStream cs = new PDPageContentStream(doc,
doc.getPage(p), AppendMode.APPEND, true, true))
{
cs.setFont(font, 50);
cs.beginText();
cs.showText(" ");
cs.endText();
}
}
doc.setAllSecurityToBeRemoved(true);
doc.save(baos);
}
catch (Throwable t)
{
continue;
}
BufferedImage bim1b;
BufferedImage bim2b;
BufferedImage bim3b;
try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
{
PDFRenderer r = new PDFRenderer(doc);
bim1b = r.renderImage(0);
bim2b = r.renderImage(1);
bim3b = r.renderImage(2);
boolean b1 = Arrays.equals(((DataBufferInt)
bim1a.getRaster().getDataBuffer()).getData(), ((DataBufferInt)
bim1b.getRaster().getDataBuffer()).getData());
boolean b2 = Arrays.equals(((DataBufferInt)
bim2a.getRaster().getDataBuffer()).getData(), ((DataBufferInt)
bim2b.getRaster().getDataBuffer()).getData());
boolean b3 = Arrays.equals(((DataBufferInt)
bim3a.getRaster().getDataBuffer()).getData(), ((DataBufferInt)
bim3b.getRaster().getDataBuffer()).getData());
if (!b1 || !b2 || !b3)
{
System.err.println("bad rendering: " + file.getName() + "
different");
try (OutputStream os =
Files.newOutputStream(Paths.get("XXXXXXX/bad-" + file.getName())))
{
os.write(baos.toByteArray());
}
}
}
catch (Throwable t)
{
t.printStackTrace();
System.err.println("bad exception: " + file.getName() + ": " +
t.getClass().getSimpleName());
try (OutputStream os =
Files.newOutputStream(Paths.get("XXXXXXXXXX/bad-" + file.getName())))
{
os.write(baos.toByteArray());
}
}
}
}
}
{code}
was (Author: tilman):
Good file: [^000025.pdf] bad result file: [^bad-000025.pdf] (one content
stream is null)
code used:
{code:java}
public class PDFBOX6194
{
public static final PDType1Font font = new
PDType1Font(Standard14Fonts.FontName.COURIER);
public static void main(String[] args) throws IOException
{
String dirName = "XXXXXXXXXXXX";
File dir = new File(dirName);
File files[] = dir.listFiles((File __, String name) ->
name.endsWith(".pdf"));
for (File file : files)
{
System.out.println("processing file: " + file.getName());
BufferedImage bim1a;
BufferedImage bim2a;
BufferedImage bim3a;
ByteArrayOutputStream baos = new ByteArrayOutputStream();
try (PDDocument doc = Loader.loadPDF(file))
{
PDFRenderer r = new PDFRenderer(doc);
if (doc.getNumberOfPages() < 3)
{
continue;
}
try
{
bim1a = r.renderImage(0);
bim2a = r.renderImage(1);
bim3a = r.renderImage(2);
}
catch (Throwable t)
{
continue;
}
for (int p = 0; p < 3; ++p)
{
try (PDPageContentStream cs = new PDPageContentStream(doc,
doc.getPage(p), AppendMode.APPEND, true, true))
{
cs.setFont(font, 50);
cs.beginText();
cs.showText(" ");
cs.endText();
}
}
doc.setAllSecurityToBeRemoved(true);
doc.save(baos);
}
catch (Throwable t)
{
continue;
}
BufferedImage bim1b;
BufferedImage bim2b;
BufferedImage bim3b;
try (PDDocument doc = Loader.loadPDF(baos.toByteArray()))
{
PDFRenderer r = new PDFRenderer(doc);
bim1b = r.renderImage(0);
bim2b = r.renderImage(1);
bim3b = r.renderImage(2);
boolean b1 = compareDatabuffer((DataBufferInt)
bim1a.getRaster().getDataBuffer(), (DataBufferInt)
bim1b.getRaster().getDataBuffer());
boolean b2 = compareDatabuffer((DataBufferInt)
bim2a.getRaster().getDataBuffer(), (DataBufferInt)
bim2b.getRaster().getDataBuffer());
boolean b3 = compareDatabuffer((DataBufferInt)
bim3a.getRaster().getDataBuffer(), (DataBufferInt)
bim3b.getRaster().getDataBuffer());
if (!b1 || !b2 || !b3)
{
System.err.println("bad rendering: " + file.getName() + "
different");
try (OutputStream os =
Files.newOutputStream(Paths.get("XXXXXXX/bad-" + file.getName())))
{
os.write(baos.toByteArray());
}
}
}
catch (Throwable t)
{
t.printStackTrace();
System.err.println("bad exception: " + file.getName() + ": " +
t.getClass().getSimpleName());
try (OutputStream os =
Files.newOutputStream(Paths.get("XXXXXXXXXX/bad-" + file.getName())))
{
os.write(baos.toByteArray());
}
}
}
}
private static boolean compareDatabuffer(DataBufferInt b1, DataBufferInt b2)
{
int[] data1 = b1.getData();
int[] data2 = b2.getData();
if (data1.length != data2.length)
{
return false;
}
for (int i = 0; i < data1.length; ++i)
{
if (data1[i] != data2[i])
{
return false;
}
}
return true;
}
}
{code}
> COSStream becomes COSDictionary after save — shared XObject reference
> replaced by Font
> --------------------------------------------------------------------------------------
>
> Key: PDFBOX-6194
> URL: https://issues.apache.org/jira/browse/PDFBOX-6194
> Project: PDFBox
> Issue Type: Bug
> Components: PDModel
> Affects Versions: 3.0.7 PDFBox
> Environment: Windows Server 2016, Java 21, PDFBox 3.0.7
> Reporter: HABA
> Priority: Major
> Attachments: 000025.pdf, bad-000025.pdf,
> image-2026-04-20-12-33-11-057.png, image-2026-04-20-13-52-20-247.png,
> image-2026-04-20-13-52-44-302.png, screenshot-1.png
>
>
> Hi,
> `document.save()` corrupts an `/XObject` on page 3 of a 3-page PDF.
> Before save:
> - `Obj5` = `COSStream` (ImageMask)
> After save:
> - `Obj5` = `COSDictionary` (Courier font)
> Pages 1–2 are unaffected. All pages share the same indirect XObject refs
> (`Obj4`, `Obj5`).
> Flow:
> - load PDF
> - render pages via `PDFRenderer.renderImageWithDPI()`
> - append invisible OCR text using `PDPageContentStream` (AppendMode.APPEND,
> Courier)
> - save document → corruption occurs
> Result:
> java.io.IOException: Unexpected object type: COSDictionary
>
> Reproduced consistently on:
> * Windows Server 2016, Java 21, PDFBox 3.0.7
> Not reproducible on:
> * Windows 11, Java 21 (same code + input)
> Likely related to shared indirect XObject being overwritten during save.
> Cannot share original PDF (confidential), but can test with synthetic
> reproducer if needed.
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]