This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 10baddc  TIKA-2374 and TIKA-2434 - roll back extracting inline images 
for pdfs in tika-app to just -z option
10baddc is described below

commit 10baddcc15501c196dccf956463e607d9973c403
Author: tballison <[email protected]>
AuthorDate: Thu Aug 17 14:11:57 2017 -0400

    TIKA-2374 and TIKA-2434 - roll back extracting inline images for pdfs in 
tika-app to just -z option
---
 .../src/main/java/org/apache/tika/cli/TikaCLI.java | 25 +++++++++++++---------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java 
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 402e62f..df68210 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -179,20 +179,24 @@ public class TikaCLI {
         return false;
     }
 
+    private void extractInlineImagesFromPDFs() {
+        if (configFilePath == null && context.get(PDFParserConfig.class) == 
null) {
+            PDFParserConfig pdfParserConfig = new PDFParserConfig();
+            pdfParserConfig.setExtractInlineImages(true);
+            String warn = "As a convenience, TikaCLI has turned on extraction 
of\n" +
+                    "inline images for the PDFParser (TIKA-2374).\n" +
+                    "Aside from the -z option, this is not the default 
behavior\n"+
+                    "in Tika generally or in tika-server.";
+            LOG.info(warn);
+            System.err.println(warn);
+            context.set(PDFParserConfig.class, pdfParserConfig);
+        }
+    }
+
     private class OutputType {
         public void process(
                 InputStream input, OutputStream output, Metadata metadata)
                 throws Exception {
-            if (configFilePath == null && context.get(PDFParserConfig.class) 
== null) {
-                PDFParserConfig pdfParserConfig = new PDFParserConfig();
-                pdfParserConfig.setExtractInlineImages(true);
-                String warn = "As a convenience, TikaCLI has turned on 
extraction of\n" +
-                        "inline images for the PDFParser (TIKA-2374).\n" +
-                        "This is not the default option in Tika generally or 
in tika-server.";
-                LOG.info(warn);
-                System.err.println(warn);
-                context.set(PDFParserConfig.class, pdfParserConfig);
-            }
             Parser p = parser;
             if (fork) {
                 p = new ForkParser(TikaCLI.class.getClassLoader(), p);
@@ -442,6 +446,7 @@ public class TikaCLI {
         } else if (arg.startsWith("--extract-dir=")) {
             extractDir = new File(arg.substring("--extract-dir=".length()));
         } else if (arg.equals("-z") || arg.equals("--extract")) {
+            extractInlineImagesFromPDFs();
             type = NO_OUTPUT;
             context.set(EmbeddedDocumentExtractor.class, new 
FileEmbeddedDocumentExtractor());
         } else if (arg.equals("-r") || arg.equals("--pretty-print")) {

-- 
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].

Reply via email to