Problem when merging XFDF annotations containing &

Kai Keggenhoff Fri, 16 Dec 2016 00:55:02 -0800

Hello,

I'm experiencing a problem when I'm merging PDFs with freetext annotations read 
from XFDF strings.
Whenever there's a "freetext" annotation with a "&" in the text (submitted as 
"&amp;" by Adobe Reader in the XFDF), merging this annotation will lead to a 
corrupted display in the resulting PDF.
This corruption manifests itself in the way that instead of the actual text, it 
shows "<body ..." from the XML.


However, if I crudely replace the "&amp;" with "&amp;amp;" in the XFDF prior to 
merging, the annotation is displayed correctly.
I have tried to read the XFDF via FDFDocument.loadXFDF(new 
ByteArrayInputStream(xfdf.getBytes("UTF-8"))) instead of parsing it to a 
document, but this did not change anything.

Tested against PDFBox 2.0.3 and 2.0.4 as of this morning.

My sample code is at the end of the mail, for the input file "demo.pdf" any 
portrait A4/letter format PDF should work.

Kind regards,

Kai Keggenhoff





import java.io.File;
import java.io.StringReader;
import java.util.List;
import javax.xml.parsers.*;
import javax.xml.transform.*;
import javax.xml.transform.dom.*;
import javax.xml.transform.stream.*;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.interactive.annotation.PDAnnotation;
import org.apache.pdfbox.pdmodel.fdf.FDFAnnotation;
import org.apache.pdfbox.pdmodel.fdf.FDFDocument;
import org.xml.sax.InputSource;

public class MergeTest {

    public MergeTest() {
    }

    public void mergePDFandXFDF(File pdf, String xfdf, String outputfilename) {

        FDFDocument fdf_doc = null;
        PDDocument pdf_doc = null;
        try {
            org.w3c.dom.Document xfdf_doc = 
DocumentBuilderFactory.newInstance().newDocumentBuilder().parse(new 
InputSource(new StringReader(xfdf)));

            fdf_doc = new FDFDocument(xfdf_doc);
            pdf_doc = PDDocument.load(pdf);

            List<FDFAnnotation> xfdfAnnotations = 
fdf_doc.getCatalog().getFDF().getAnnotations();
            for (FDFAnnotation xfdfAnnotation : xfdfAnnotations) {
                PDAnnotation a = 
PDAnnotation.createAnnotation(xfdfAnnotation.getCOSObject());
                a.setReadOnly(true);

                PDPage page = pdf_doc.getPage(xfdfAnnotation.getPage());
                List<PDAnnotation> pageAnnotations = page.getAnnotations();
                pageAnnotations.add(a);
            }

            File resultFile = new File(outputfilename);
            pdf_doc.save(resultFile);
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        finally {
            if (fdf_doc!=null) try { fdf_doc.close(); } catch (Exception e) { }
            if (pdf_doc!=null) try { pdf_doc.close(); } catch (Exception e) { }
        }
    }

    public static void main(String argv[]) {

        String xfdf = "<?xml version=\"1.0\" encoding=\"UTF-8\"?><xfdf 
xmlns=\"http://ns.adobe.com/xfdf/\"; xml:space=\"preserve\"><annots><freetext 
color=\"#FFFFFF\" creationdate=\"D:20161216082154+01'00'\" flags=\"print\" 
date=\"D:20161216082219+01'00'\" page=\"0\" 
rect=\"342.148376,763.152039,402.267456,807.962036\" subject=\"Textfeld\" 
title=\"keggenhoff\"><contents-richtext><body 
xmlns=\"http://www.w3.org/1999/xhtml\"; 
xmlns:xfa=\"http://www.xfa.org/schema/xfa-data/1.0/\"; 
xfa:APIVersion=\"Acrobat:15.20.0\" xfa:spec=\"2.0.2\" 
style=\"font-size:12.0pt;text-align:left;color:#FF0000;font-weight:normal;font-style:normal;font-family:Helvetica,sans-serif;font-stretch:normal\"><p
 dir=\"ltr\"><span style=\"font-family:Helvetica\">A &amp; B&#xD;1 &amp; 2 
&amp; 3&#xD;4 &amp; 5 &amp; 
6</span></p></body></contents-richtext><defaultappearance>0.898 0.1333 0.2157 
rg /Helv 12 Tf</defaultappearance><defaultstyle>font: Helvetica,sans-serif 
12.0pt; text-align:left; color:#E52237 </defaultstyle></freetext></annots><f 
href=\"/C/Users/kegg/AppData/Local/Temp/demo.pdf\"/><fields><field 
name=\"submit\"/></fields><ids original=\"F285D06ECA30C5579E72B6B7AE07BC0B\" 
modified=\"EA76360AC37EFC04A7716DA16651675E\"/></xfdf>";

        File pdf = new File("demo.pdf");

        MergeTest mt = new MergeTest();
        mt.mergePDFandXFDF(pdf, xfdf, "demo_1.pdf");
        mt.mergePDFandXFDF(pdf, xfdf.replace("&amp;", "&amp;amp;"), 
"demo_2.pdf");
    }
}

Problem when merging XFDF annotations containing &

Reply via email to