Author: mattmann
Date: Tue Sep 23 03:40:52 2014
New Revision: 1626932
URL: http://svn.apache.org/r1626932
Log:
Fix for TIKA-1421 Check if Tesseract is installed before attempting OCR
Contributed by tpalsulich,mattmann.
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java?rev=1626932&r1=1626931&r2=1626932&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
Tue Sep 23 03:40:52 2014
@@ -45,6 +45,7 @@ import org.apache.tika.metadata.Metadata
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.sax.XHTMLContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
@@ -132,6 +133,10 @@ public class TesseractOCRParser extends
throws IOException, SAXException, TikaException {
TesseractOCRConfig config = context.get(TesseractOCRConfig.class);
if(config == null) config = new TesseractOCRConfig();
+
+ String[] checkCmd = {config.getTesseractPath() + "tesseract"};
+ // If Tesseract is not on the path, do not try to run OCR.
+ if (!ExternalParser.check(checkCmd)) return;
XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
xhtml.startDocument();