Author: nick
Date: Sun Nov 30 17:19:45 2014
New Revision: 1642570

URL: http://svn.apache.org/r1642570
Log:
Add a TODO for TIKA-1490

Modified:
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java

Modified: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java?rev=1642570&r1=1642569&r2=1642570&view=diff
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
 (original)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/microsoft/ExcelExtractor.java
 Sun Nov 30 17:19:45 2014
@@ -96,6 +96,7 @@ public class ExcelExtractor extends Abst
     private boolean listenForAllRecords = false;
     
     private static final String WORKBOOK_ENTRY = "Workbook";
+    private static final String BOOK_ENTRY = "Book";
 
     public ExcelExtractor(ParseContext context) {
         super(context);
@@ -143,8 +144,15 @@ public class ExcelExtractor extends Abst
             DirectoryNode root, XHTMLContentHandler xhtml,
             Locale locale) throws IOException, SAXException, TikaException {
         if (! root.hasEntry(WORKBOOK_ENTRY)) {
-           // Corrupt file / very old file, just skip
-           return;
+            if (root.hasEntry(BOOK_ENTRY)) {
+                // Excel 5 / Excel 95 file
+                // Records are in a different structure so needs a
+                //  different parser to process them
+                // TODO Call one, see TIKA-1490
+            } else {
+               // Corrupt file / very old file, just skip text extraction
+               return;
+            }
         }
        
         TikaHSSFListener listener = new TikaHSSFListener(xhtml, locale, this);
@@ -610,5 +618,4 @@ public class ExcelExtractor extends Abst
         }
 
     }
-
 }


Reply via email to