Author: mattmann
Date: Tue Sep 23 03:40:52 2014
New Revision: 1626932

URL: http://svn.apache.org/r1626932
Log:
Fix for TIKA-1421 Check if Tesseract is installed before attempting OCR 
Contributed by tpalsulich,mattmann.

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java?rev=1626932&r1=1626931&r2=1626932&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/ocr/TesseractOCRParser.java
 Tue Sep 23 03:40:52 2014
@@ -45,6 +45,7 @@ import org.apache.tika.metadata.Metadata
 import org.apache.tika.mime.MediaType;
 import org.apache.tika.parser.AbstractParser;
 import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.external.ExternalParser;
 import org.apache.tika.sax.XHTMLContentHandler;
 import org.xml.sax.ContentHandler;
 import org.xml.sax.SAXException;
@@ -132,6 +133,10 @@ public class TesseractOCRParser extends 
             throws IOException, SAXException, TikaException {
        TesseractOCRConfig config = context.get(TesseractOCRConfig.class);
        if(config == null) config = new TesseractOCRConfig();
+
+        String[] checkCmd = {config.getTesseractPath() + "tesseract"};
+        // If Tesseract is not on the path, do not try to run OCR.
+        if (!ExternalParser.check(checkCmd)) return;
        
        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
        xhtml.startDocument();


Reply via email to