This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 10baddc TIKA-2374 and TIKA-2434 - roll back extracting inline images
for pdfs in tika-app to just -z option
10baddc is described below
commit 10baddcc15501c196dccf956463e607d9973c403
Author: tballison <[email protected]>
AuthorDate: Thu Aug 17 14:11:57 2017 -0400
TIKA-2374 and TIKA-2434 - roll back extracting inline images for pdfs in
tika-app to just -z option
---
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 25 +++++++++++++---------
1 file changed, 15 insertions(+), 10 deletions(-)
diff --git a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
index 402e62f..df68210 100644
--- a/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
+++ b/tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
@@ -179,20 +179,24 @@ public class TikaCLI {
return false;
}
+ private void extractInlineImagesFromPDFs() {
+ if (configFilePath == null && context.get(PDFParserConfig.class) ==
null) {
+ PDFParserConfig pdfParserConfig = new PDFParserConfig();
+ pdfParserConfig.setExtractInlineImages(true);
+ String warn = "As a convenience, TikaCLI has turned on extraction
of\n" +
+ "inline images for the PDFParser (TIKA-2374).\n" +
+ "Aside from the -z option, this is not the default
behavior\n"+
+ "in Tika generally or in tika-server.";
+ LOG.info(warn);
+ System.err.println(warn);
+ context.set(PDFParserConfig.class, pdfParserConfig);
+ }
+ }
+
private class OutputType {
public void process(
InputStream input, OutputStream output, Metadata metadata)
throws Exception {
- if (configFilePath == null && context.get(PDFParserConfig.class)
== null) {
- PDFParserConfig pdfParserConfig = new PDFParserConfig();
- pdfParserConfig.setExtractInlineImages(true);
- String warn = "As a convenience, TikaCLI has turned on
extraction of\n" +
- "inline images for the PDFParser (TIKA-2374).\n" +
- "This is not the default option in Tika generally or
in tika-server.";
- LOG.info(warn);
- System.err.println(warn);
- context.set(PDFParserConfig.class, pdfParserConfig);
- }
Parser p = parser;
if (fork) {
p = new ForkParser(TikaCLI.class.getClassLoader(), p);
@@ -442,6 +446,7 @@ public class TikaCLI {
} else if (arg.startsWith("--extract-dir=")) {
extractDir = new File(arg.substring("--extract-dir=".length()));
} else if (arg.equals("-z") || arg.equals("--extract")) {
+ extractInlineImagesFromPDFs();
type = NO_OUTPUT;
context.set(EmbeddedDocumentExtractor.class, new
FileEmbeddedDocumentExtractor());
} else if (arg.equals("-r") || arg.equals("--pretty-print")) {
--
To stop receiving notification emails like this one, please contact
['"[email protected]" <[email protected]>'].