(tika) 01/01: TIKA-4465 -- extract javascript from name tree

tallison Fri, 15 Aug 2025 05:16:55 -0700

This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4465
in repository https://gitbox.apache.org/repos/asf/tika.git


commit 7e5733df02562b83d9e1f3a81b6183952314c97a
Author: tallison <[email protected]>
AuthorDate: Fri Aug 15 08:15:01 2025 -0400

    TIKA-4465 -- extract javascript from name tree
---
 .../main/java/org/apache/tika/metadata/PDF.java    |   6 +
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  | 274 +++++++++++++--------
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  72 +++++-
 3 files changed, 239 insertions(+), 113 deletions(-)

diff --git a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java 
b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
index b15c10383..f85218936 100644
--- a/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
+++ b/tika-core/src/main/java/org/apache/tika/metadata/PDF.java
@@ -215,4 +215,10 @@ public interface PDF {
      */
     Property OCR_PAGE_COUNT = Property.externalInteger(PDF_PREFIX + 
"ocrPageCount");
 
+    /**
+     * When javascript is stored in the names tree, there's a name associated 
with that script.
+     * This is that name. When javascript is stored in an action, there is no 
name, and this
+     * metadata will not be populated.
+     */
+    Property JS_NAME = Property.internalText(PDF_PREFIX + "jsName");
 }
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
index de47f2394..5b3525488 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/parser/pdf/AbstractPDF2XHTML.java
@@ -54,6 +54,7 @@ import org.apache.pdfbox.cos.COSName;
 import org.apache.pdfbox.cos.COSObject;
 import org.apache.pdfbox.pdmodel.PDDocument;
 import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
+import org.apache.pdfbox.pdmodel.PDJavascriptNameTreeNode;
 import org.apache.pdfbox.pdmodel.PDPage;
 import org.apache.pdfbox.pdmodel.PDPageTree;
 import org.apache.pdfbox.pdmodel.common.COSObjectable;
@@ -700,92 +701,7 @@ class AbstractPDF2XHTML extends PDFTextStripper {
 
         try {
             for (PDAnnotation annotation : page.getAnnotations()) {
-                String annotationName = annotation.getAnnotationName();
-                if (annotationTypes.size() < MAX_ANNOTATION_TYPES) {
-                    if (annotationName != null) {
-                        annotationTypes.add(annotationName);
-                    } else {
-                        annotationTypes.add(NULL_STRING);
-                    }
-                }
-                String annotationSubtype = annotation.getSubtype();
-                if (annotationSubtypes.size() < MAX_ANNOTATION_TYPES) {
-                    if (annotationSubtype != null) {
-                        annotationSubtypes.add(annotationSubtype);
-                    } else {
-                        annotationSubtypes.add(NULL_STRING);
-                    }
-                }
-                if (annotation instanceof PDAnnotationFileAttachment) {
-                    PDAnnotationFileAttachment fann = 
(PDAnnotationFileAttachment) annotation;
-                    String subtype = "annotationFileAttachment";
-                    AttributesImpl attributes = new AttributesImpl();
-                    attributes.addAttribute("", "source", "source", "CDATA", 
subtype);
-                    processDocOnAction("", subtype, fann.getFile(), 
attributes);
-                } else if (annotation instanceof PDAnnotationWidget) {
-                    handleWidget((PDAnnotationWidget) annotation);
-                } else {
-                    if (annotationSubtype == null) {
-                        annotationSubtype = "unknown";
-                    } else if (annotationSubtype.equals(THREE_D) ||
-                            annotation.getCOSObject().containsKey(THREE_DD)) {
-                        //To make this stricter, we could get the 3DD stream 
object and see if the
-                        //subtype is U3D or PRC or model/ (prefix for model 
mime type)
-                        metadata.set(PDF.HAS_3D, true);
-                        num3DAnnotations++;
-                    }
-                    for (COSDictionary fileSpec : 
findFileSpecs(annotation.getCOSObject())) {
-                        AttributesImpl attributes = new AttributesImpl();
-                        attributes.addAttribute("", "source", "source", 
"CDATA", annotationSubtype);
-                        processDocOnAction("", annotationSubtype, 
createFileSpecification(fileSpec),
-                                attributes);
-                    }
-                }
-                // TODO: remove once PDFBOX-1143 is fixed:
-                if (config.isExtractAnnotationText()) {
-                    PDActionURI uri = getActionURI(annotation);
-                    if (uri != null) {
-                        String link = uri.getURI();
-                        if (link != null && !link.isBlank()) {
-                            xhtml.startElement("div", "class", "annotation");
-                            xhtml.startElement("a", "href", link);
-                            xhtml.characters(link);
-                            xhtml.endElement("a");
-                            xhtml.endElement("div");
-                        }
-                    }
-
-                    if (annotation instanceof PDAnnotationMarkup) {
-                        PDAnnotationMarkup annotationMarkup = 
(PDAnnotationMarkup) annotation;
-                        String title = annotationMarkup.getTitlePopup();
-                        String subject = annotationMarkup.getSubject();
-                        String contents = annotationMarkup.getContents();
-                        // TODO: maybe also annotationMarkup.getRichContents()?
-                        if (title != null || subject != null || contents != 
null) {
-                            xhtml.startElement("div", "class", "annotation");
-
-                            if (title != null) {
-                                xhtml.startElement("div", "class", 
"annotationTitle");
-                                xhtml.characters(title);
-                                xhtml.endElement("div");
-                            }
-
-                            if (subject != null) {
-                                xhtml.startElement("div", "class", 
"annotationSubject");
-                                xhtml.characters(subject);
-                                xhtml.endElement("div");
-                            }
-
-                            if (contents != null) {
-                                xhtml.startElement("div", "class", 
"annotationContents");
-                                xhtml.characters(contents);
-                                xhtml.endElement("div");
-                            }
-
-                            xhtml.endElement("div");
-                        }
-                    }
-                }
+                processPageAnnotation(annotation);
             }
             if (config.getOcrStrategy() == 
PDFParserConfig.OCR_STRATEGY.OCR_AND_TEXT_EXTRACTION) {
                 doOCROnCurrentPage(page, OCR_AND_TEXT_EXTRACTION);
@@ -835,6 +751,95 @@ class AbstractPDF2XHTML extends PDFTextStripper {
         }
     }
 
+    private void processPageAnnotation(PDAnnotation annotation) throws 
TikaException, IOException, SAXException {
+        String annotationName = annotation.getAnnotationName();
+        if (annotationTypes.size() < MAX_ANNOTATION_TYPES) {
+            if (annotationName != null) {
+                annotationTypes.add(annotationName);
+            } else {
+                annotationTypes.add(NULL_STRING);
+            }
+        }
+        String annotationSubtype = annotation.getSubtype();
+        if (annotationSubtypes.size() < MAX_ANNOTATION_TYPES) {
+            if (annotationSubtype != null) {
+                annotationSubtypes.add(annotationSubtype);
+            } else {
+                annotationSubtypes.add(NULL_STRING);
+            }
+        }
+        if (annotation instanceof PDAnnotationFileAttachment) {
+            PDAnnotationFileAttachment fann = (PDAnnotationFileAttachment) 
annotation;
+            String subtype = "annotationFileAttachment";
+            AttributesImpl attributes = new AttributesImpl();
+            attributes.addAttribute("", "source", "source", "CDATA", subtype);
+            processDocOnAction("", subtype, fann.getFile(), attributes);
+        } else if (annotation instanceof PDAnnotationWidget) {
+            handleWidget((PDAnnotationWidget) annotation);
+        } else {
+            if (annotationSubtype == null) {
+                annotationSubtype = "unknown";
+            } else if (annotationSubtype.equals(THREE_D) ||
+                    annotation.getCOSObject().containsKey(THREE_DD)) {
+                //To make this stricter, we could get the 3DD stream object 
and see if the
+                //subtype is U3D or PRC or model/ (prefix for model mime type)
+                metadata.set(PDF.HAS_3D, true);
+                num3DAnnotations++;
+            }
+            for (COSDictionary fileSpec : 
findFileSpecs(annotation.getCOSObject())) {
+                AttributesImpl attributes = new AttributesImpl();
+                attributes.addAttribute("", "source", "source", "CDATA", 
annotationSubtype);
+                processDocOnAction("", annotationSubtype, 
createFileSpecification(fileSpec),
+                        attributes);
+            }
+        }
+        // TODO: remove once PDFBOX-1143 is fixed:
+        if (config.isExtractAnnotationText()) {
+            PDActionURI uri = getActionURI(annotation);
+            if (uri != null) {
+                String link = uri.getURI();
+                if (link != null && !link.isBlank()) {
+                    xhtml.startElement("div", "class", "annotation");
+                    xhtml.startElement("a", "href", link);
+                    xhtml.characters(link);
+                    xhtml.endElement("a");
+                    xhtml.endElement("div");
+                }
+            }
+
+            if (annotation instanceof PDAnnotationMarkup) {
+                PDAnnotationMarkup annotationMarkup = (PDAnnotationMarkup) 
annotation;
+                String title = annotationMarkup.getTitlePopup();
+                String subject = annotationMarkup.getSubject();
+                String contents = annotationMarkup.getContents();
+                // TODO: maybe also annotationMarkup.getRichContents()?
+                if (title != null || subject != null || contents != null) {
+                    xhtml.startElement("div", "class", "annotation");
+
+                    if (title != null) {
+                        xhtml.startElement("div", "class", "annotationTitle");
+                        xhtml.characters(title);
+                        xhtml.endElement("div");
+                    }
+
+                    if (subject != null) {
+                        xhtml.startElement("div", "class", 
"annotationSubject");
+                        xhtml.characters(subject);
+                        xhtml.endElement("div");
+                    }
+
+                    if (contents != null) {
+                        xhtml.startElement("div", "class", 
"annotationContents");
+                        xhtml.characters(contents);
+                        xhtml.endElement("div");
+                    }
+
+                    xhtml.endElement("div");
+                }
+            }
+        }
+    }
+
     private List<COSDictionary> findFileSpecs(COSDictionary cosDict) {
         Set<COSName> types = new HashSet<>();
         types.add(COSName.FILESPEC);
@@ -906,6 +911,7 @@ class AbstractPDF2XHTML extends PDFTextStripper {
     protected void startDocument(PDDocument pdf) throws IOException {
         try {
             xhtml.startDocument();
+            extractJavaScript(pdf);
             try {
                 
handleDestinationOrAction(pdf.getDocumentCatalog().getOpenAction(),
                         ActionTrigger.DOCUMENT_OPEN);
@@ -918,6 +924,57 @@ class AbstractPDF2XHTML extends PDFTextStripper {
         }
     }
 
+    private void extractJavaScript(PDDocument pdf) throws SAXException {
+        if (! config.isExtractActions()) {
+            return;
+        }
+        if (pdf.getDocumentCatalog() == null || 
pdf.getDocumentCatalog().getNames() == null
+                || pdf.getDocumentCatalog().getNames().getJavaScript() == 
null) {
+            return;
+        }
+        try {
+            PDJavascriptNameTreeNode pdjntn = 
pdf.getDocumentCatalog().getNames().getJavaScript();
+            addJavaScript(pdjntn.getNames());
+            int depth = 0;
+            processJavascriptNameTreeNodeKids(pdjntn.getKids(), depth + 1);
+        } catch (IOException e) {
+            //swallow
+        }
+    }
+
+    private void addJavaScript(Map<String, PDActionJavaScript> 
pdActionJavaScriptMap) throws IOException, SAXException {
+        for (Map.Entry<String, PDActionJavaScript> e : 
pdActionJavaScriptMap.entrySet()) {
+            String action = e.getValue().getAction();
+            if (StringUtils.isBlank(action)) {
+                return;
+            }
+            AttributesImpl attributes = new AttributesImpl();
+
+            addNonNullAttribute("trigger", "namesTree", attributes);
+            addNonNullAttribute("type", 
e.getValue().getClass().getSimpleName(), attributes);
+
+            processJavaScriptAction("NAMES_TREE", e.getKey(), e.getValue(), 
attributes);
+        }
+
+    }
+
+    private void 
processJavascriptNameTreeNodeKids(List<PDNameTreeNode<PDActionJavaScript>> 
kids, int depth) throws IOException, SAXException {
+
+        if (kids == null) {
+            return;
+        }
+
+        if (depth > MAX_RECURSION_DEPTH) {
+            //hit max recursion
+            //return silently
+            return;
+        }
+        for (PDNameTreeNode<PDActionJavaScript> pdntn: kids) {
+            addJavaScript(pdntn.getNames());
+            processJavascriptNameTreeNodeKids(pdntn.getKids(), depth + 1);
+        };
+    }
+
     private void handleDestinationOrAction(PDDestinationOrAction action,
                                            ActionTrigger actionTrigger)
             throws IOException, SAXException, TikaException {
@@ -952,25 +1009,7 @@ class AbstractPDF2XHTML extends PDFTextStripper {
             PDActionRemoteGoTo remoteGoTo = (PDActionRemoteGoTo) action;
             processDocOnAction("", "", remoteGoTo.getFile(), attributes);
         } else if (action instanceof PDActionJavaScript) {
-            PDActionJavaScript jsAction = (PDActionJavaScript) action;
-            Metadata m = new Metadata();
-            m.set(Metadata.CONTENT_TYPE, "application/javascript");
-            m.set(Metadata.CONTENT_ENCODING, 
StandardCharsets.UTF_8.toString());
-            m.set(PDF.ACTION_TRIGGER, actionTrigger.toString());
-            m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
-                    TikaCoreProperties.EmbeddedResourceType.MACRO.name());
-            String js = jsAction.getAction();
-            js = (js == null) ? "" : js;
-            if (embeddedDocumentExtractor.shouldParseEmbedded(m)) {
-                try (TikaInputStream tis = 
TikaInputStream.get(js.getBytes(StandardCharsets.UTF_8))) {
-                    embeddedDocumentExtractor.parseEmbedded(tis, xhtml, m, 
true);
-                }
-            }
-            addNonNullAttribute("class", "javascript", attributes);
-            addNonNullAttribute("type", jsAction.getType(), attributes);
-            addNonNullAttribute("subtype", jsAction.getSubType(), attributes);
-            xhtml.startElement("div", attributes);
-            xhtml.endElement("div");
+            processJavaScriptAction(actionTrigger.name(), null, 
(PDActionJavaScript) action, attributes);
         /*} else if (action instanceof PDActionSubmitForm) {
             PDActionSubmitForm submitForm = (PDActionSubmitForm) action;
             //these are typically urls, not actual file specification
@@ -982,6 +1021,31 @@ class AbstractPDF2XHTML extends PDFTextStripper {
         }
     }
 
+    private void processJavaScriptAction(String trigger, String jsActionName, 
PDActionJavaScript jsAction, AttributesImpl attrs) throws IOException, 
SAXException {
+        Metadata m = new Metadata();
+        m.set(Metadata.CONTENT_TYPE, "application/javascript");
+        m.set(Metadata.CONTENT_ENCODING, StandardCharsets.UTF_8.toString());
+        m.set(PDF.ACTION_TRIGGER, trigger);
+        m.set(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE,
+                TikaCoreProperties.EmbeddedResourceType.MACRO.name());
+        if (! StringUtils.isBlank(jsActionName)) {
+            m.set(PDF.JS_NAME, jsActionName);
+        }
+        String js = jsAction.getAction();
+        js = (js == null) ? "" : js;
+        if (embeddedDocumentExtractor.shouldParseEmbedded(m)) {
+            try (TikaInputStream tis = 
TikaInputStream.get(js.getBytes(StandardCharsets.UTF_8))) {
+                embeddedDocumentExtractor.parseEmbedded(tis, xhtml, m, true);
+            }
+        }
+        ;
+        addNonNullAttribute("class", "javascript", attrs);
+        addNonNullAttribute("type", jsAction.getType(), attrs);
+        addNonNullAttribute("subtype", jsAction.getSubType(), attrs);
+        xhtml.startElement("div", attrs);
+        xhtml.endElement("div");
+    }
+
     @Override
     protected void endDocument(PDDocument pdf) throws IOException {
         try {
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
index 49b0042cb..52bf80129 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/parser/pdf/PDFParserTest.java
@@ -25,10 +25,14 @@ import static org.junit.jupiter.api.Assertions.fail;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
 import java.io.InputStream;
+import java.util.HashSet;
 import java.util.List;
 import java.util.Locale;
+import java.util.Set;
 import java.util.logging.Level;
 import java.util.logging.Logger;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 import org.junit.jupiter.api.AfterAll;
 import org.junit.jupiter.api.BeforeAll;
@@ -55,6 +59,7 @@ import org.apache.tika.parser.ocr.TesseractOCRParser;
 import org.apache.tika.parser.xml.XMLProfiler;
 import org.apache.tika.sax.BasicContentHandlerFactory;
 import org.apache.tika.sax.RecursiveParserWrapperHandler;
+import org.apache.tika.utils.StringUtils;
 
 public class PDFParserTest extends TikaTest {
     public static final MediaType TYPE_TEXT = MediaType.TEXT_PLAIN;
@@ -251,7 +256,7 @@ public class PDFParserTest extends TikaTest {
 
     @Test
     public void testEmbeddedDocsWithOCROnly() throws Exception {
-        assumeTrue(canRunOCR(), "can run OCR");
+        assumeTrue(canRunOCR(), "can't run OCR");
         //test default is "auto"
         assertEquals(PDFParserConfig.OCR_STRATEGY.AUTO, new 
PDFParserConfig().getOcrStrategy());
         testStrategy(null);
@@ -367,7 +372,7 @@ public class PDFParserTest extends TikaTest {
 
     @Test
     public void testJBIG2OCROnly() throws Exception {
-        assumeTrue(canRunOCR(), "can run OCR");
+        assumeTrue(canRunOCR(), "can't run OCR");
         PDFParserConfig config = new PDFParserConfig();
         config.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.OCR_ONLY);
         ParseContext context = new ParseContext();
@@ -379,7 +384,7 @@ public class PDFParserTest extends TikaTest {
 
     @Test
     public void testJPEG2000() throws Exception {
-        assumeTrue(canRunOCR(), "can run OCR");
+        assumeTrue(canRunOCR(), "can't run OCR");
         PDFParserConfig config = new PDFParserConfig();
         config.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.OCR_ONLY);
         ParseContext context = new ParseContext();
@@ -391,7 +396,7 @@ public class PDFParserTest extends TikaTest {
 
     @Test
     public void testOCRAutoMode() throws Exception {
-        assumeTrue(canRunOCR(), "can run OCR");
+        assumeTrue(canRunOCR(), "can't run OCR");
 
         //default
         assertContains("Happy New Year", getXML("testOCR.pdf").xml);
@@ -410,7 +415,7 @@ public class PDFParserTest extends TikaTest {
 
     @Test
     public void testOCRNoText() throws Exception {
-        assumeTrue(canRunOCR(), "can run OCR");
+        assumeTrue(canRunOCR(), "can't run OCR");
         PDFParserConfig config = new PDFParserConfig();
         
config.setOcrRenderingStrategy(PDFParserConfig.OCR_RENDERING_STRATEGY.ALL);
         config.setOcrStrategy(PDFParserConfig.OCR_STRATEGY.OCR_ONLY);
@@ -433,7 +438,7 @@ public class PDFParserTest extends TikaTest {
         //TIKA-2970 -- make sure that configurations set on the 
TesseractOCRParser
         //make it through to when the TesseractOCRParser is called via
         //the PDFParser
-        assumeTrue(canRunOCR(), "can run OCR");
+        assumeTrue(canRunOCR(), "can't run OCR");
 
         //via the config, tesseract should skip this file because it is too 
large
         try (InputStream is = getResourceAsStream(
@@ -458,8 +463,8 @@ public class PDFParserTest extends TikaTest {
     public void testMuPDFInOCR() throws Exception {
         //TODO -- need to add "rendered by" to confirm that mutool was 
actually called
         //and that there wasn't some backoff to PDFBox the PDFParser
-        assumeTrue(canRunOCR(), "can run OCR");
-        assumeTrue(hasMuPDF(), "has mupdf");
+        assumeTrue(canRunOCR(), "can't run OCR");
+        assumeTrue(hasMuPDF(), "does not have mupdf");
         try (InputStream is = getResourceAsStream(
                 "/configs/tika-rendering-mupdf-config.xml")) {
             assertNotNull(is);
@@ -508,4 +513,55 @@ public class PDFParserTest extends TikaTest {
         
assertEquals(TikaCoreProperties.EmbeddedResourceType.VERSION.toString(),
                 
metadataList.get(2).get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
     }
+
+    @Test
+    public void testJavascriptInNamesTreeOne() throws Exception {
+        PDFParserConfig config = new PDFParserConfig();
+        config.setExtractActions(true);
+        ParseContext pc = new ParseContext();
+        pc.set(PDFParserConfig.class, config);
+        List<Metadata> metadataList = 
getRecursiveMetadata("testPDFPackage.pdf", pc, true);
+        assertEquals(4, metadataList.size());
+        //look for markup in primary document
+        Metadata m = metadataList.get(0);
+        String xhtml = m.get(TikaCoreProperties.TIKA_CONTENT);
+        Matcher matcher = Pattern.compile("<div 
([^>]{0,1000})>").matcher(xhtml);
+        boolean found = false;
+        while (matcher.find()) {
+            String div = matcher.group(1);
+            if (div.contains("trigger=\"namesTree\"")) {
+                assertContains("type=\"PDActionJavaScript\"", div);
+                assertContains("class=\"javascript\"", div);
+                assertContains("subtype=\"JavaScript\"", div);
+                found = true;
+            }
+        }
+        if (! found) {
+            fail("failed to find js div in main document");
+        }
+        //now test js extraction
+        Metadata js = metadataList.get(1);
+        assertEquals("MACRO", 
js.get(TikaCoreProperties.EMBEDDED_RESOURCE_TYPE));
+        assertEquals("NAMES_TREE", js.get(PDF.ACTION_TRIGGER));
+        
assertTrue(js.get(PDF.JS_NAME).startsWith("ADBE::FileAttachmentsCompatibility"));
+        assertContains("app.viewerVersion", 
js.get(TikaCoreProperties.TIKA_CONTENT));
+    }
+
+    @Test
+    public void testJavascriptInNamesTreeTwo() throws Exception {
+        Set<String> expected = Set.of("!ADBE::0200_VersChkCode_XFACheck", 
"!ADBE::0100_VersChkVars", "!ADBE::0100_VersChkStrings");
+        PDFParserConfig config = new PDFParserConfig();
+        config.setExtractActions(true);
+        ParseContext pc = new ParseContext();
+        pc.set(PDFParserConfig.class, config);
+        List<Metadata> metadataList = 
getRecursiveMetadata("testPDF_XFA_govdocs1_258578.pdf", pc, true);
+        Set<String> jsNames = new HashSet<>();
+        for (Metadata m : metadataList) {
+            String n = m.get(PDF.JS_NAME);
+            if (!StringUtils.isBlank(n)) {
+                jsNames.add(n);
+            }
+        }
+        assertEquals(expected, jsNames);
+    }
 }

(tika) 01/01: TIKA-4465 -- extract javascript from name tree

Reply via email to