[ https://issues.apache.org/jira/browse/PDFBOX-4407?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Tilman Hausherr updated PDFBOX-4407: ------------------------------------ Component/s: Utilities > ParentTree Objects do not match KArray objects after merge > ---------------------------------------------------------- > > Key: PDFBOX-4407 > URL: https://issues.apache.org/jira/browse/PDFBOX-4407 > Project: PDFBox > Issue Type: Bug > Components: Utilities > Affects Versions: 2.0.13 > Reporter: Dan Anderson > Priority: Major > Labels: StructureTree > Fix For: 2.0.14, 3.0.0 PDFBox > > Attachments: 4407.patch, reading-order-merged-bad.pdf, > reading-order-merged-good.pdf, reading-order.pdf > > > After merging tagged documents together, the second page of the resulting > document is no longer valid. When the field objects are cloned in > PDFMergerUtility, the new and old objects are stored in a map named > objMapping. This is used to replace the old references with the new > references for the acroform, k array, and annotation list. However the > ParentTree is not updated to this new object reference. This results in the > K Array and the Parent Tree having different references to the same object. > This causes issues when using an a11y reader like Jaws, and also causes > problems displaying the tags in Adobe DC. > Here is a failing unit test that was created in PDFMergerUtilityTest to > demonstrate the issue. It was created using an example from W3: > https://www.w3.org/WAI/WCAG20/Techniques/working-examples/PDF3/reading-order.pdf > {code:java} > public void testStructureTreeMerge3() throws IOException > { > PDFMergerUtility pdfMergerUtility = new PDFMergerUtility(); > PDDocument src = PDDocument.load(new File(SRCDIR, "reading-order.pdf")); > PDDocument dst = PDDocument.load(new File(SRCDIR, "reading-order.pdf")); > pdfMergerUtility.appendDocument(dst, src); > src.close(); > dst.save(new File(TARGETTESTDIR, "reading-order-merged.pdf")); > dst.close(); > PDDocument doc = PDDocument.load(new File(TARGETTESTDIR, > "reading-order-merged.pdf")); > > assertTrue(checkAnnotationMatches(doc.getDocumentCatalog().getStructureTreeRoot().getKArray(), > doc.getDocumentCatalog().getAcroForm().getFields(), > (COSArray)doc.getDocumentCatalog().getStructureTreeRoot().getParentTree().getCOSObject().getDictionaryObject(COSName.NUMS))); > } > private boolean checkAnnotationMatches(COSArray kArray, List<PDField> > acroformFields, COSArray numbersArray) { > for (int i = 0; i < kArray.size(); i++) { > COSBase entry = kArray.get(i); > if (entry instanceof COSArray){ > COSArray entryAsArray = (COSArray) entry; > if (!checkAnnotationMatches(entryAsArray, acroformFields, > numbersArray)) { > return false; > } > } else if (entry instanceof COSInteger) { > //do nothing, just need to screen these out so next line doesn't > blow up > } else if (((COSObject) entry).getObject() instanceof COSDictionary){ > COSDictionary entryDictionary = (COSDictionary)((COSObject) > entry).getObject(); > if (entryDictionary.getItem(COSName.K) != null) { > COSBase kids = entryDictionary.getItem(COSName.K); > if (kids != null) { > if (kids instanceof COSInteger) { > //do nothing, don't care about marked content tags > } else if (kids instanceof COSDictionary) { > COSDictionary kidsAsDictionary = (COSDictionary) kids; > if > (!checkForMatches(kidsAsDictionary.getDictionaryObject(COSName.OBJ), > acroformFields, numbersArray)) { > return false; > } > } else if (kids instanceof COSArray) { > COSArray kidsAsArray = (COSArray) kids; > if (!checkAnnotationMatches(kidsAsArray, > acroformFields, numbersArray)) { > return false; > } > } > } > } else if (entryDictionary.getDictionaryObject(COSName.OBJ) != > null) { > if > (!checkForMatches(entryDictionary.getDictionaryObject(COSName.OBJ), > acroformFields, numbersArray)) { > return false; > } > } > } > } > return true; > } > private boolean checkForMatches(COSBase objectReference, List<PDField> > acroformFields, COSArray numbersArray) { > boolean result = false; > for (PDField field : acroformFields) { > if (field.getCOSObject() == objectReference && > numbersArray.indexOfObject(objectReference.getCOSObject()) > 0) { > result = true; > } > } > return result; > } > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: dev-unsubscr...@pdfbox.apache.org For additional commands, e-mail: dev-h...@pdfbox.apache.org