Author: jukka
Date: Mon May  5 23:29:19 2008
New Revision: 653686

URL: http://svn.apache.org/viewvc?rev=653686&view=rev
Log:
TIKA-92: Image metadata extraction
    - Added a simple ImageParser based on ImageIO
    - Currently only supports custom "width" and "height" metadata fields
    - Included a few test images

Added:
    incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/
    
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
    incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/
    
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
    incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp   (with 
props)
    incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif   (with 
props)
    incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg   (with 
props)
    incubator/tika/trunk/src/test/resources/test-documents/testPNG.png   (with 
props)
    incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif   (with 
props)
Modified:
    incubator/tika/trunk/src/main/resources/tika-config.xml

Added: 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=653686&view=auto
==============================================================================
--- 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
 (added)
+++ 
incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java
 Mon May  5 23:29:19 2008
@@ -0,0 +1,46 @@
+package org.apache.tika.parser.image;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+
+import javax.imageio.ImageIO;
+import javax.imageio.ImageReader;
+
+import org.apache.commons.io.input.CloseShieldInputStream;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.XHTMLContentHandler;
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class ImageParser implements Parser {
+
+    public void parse(InputStream stream, Metadata metadata)
+            throws IOException, TikaException {
+        String type = metadata.get(Metadata.CONTENT_TYPE);
+        if (type != null) {
+            Iterator<ImageReader> iterator =
+                ImageIO.getImageReadersByMIMEType(type);
+            if (iterator.hasNext()) {
+                ImageReader reader = iterator.next();
+                reader.setInput(ImageIO.createImageInputStream(
+                        new CloseShieldInputStream(stream)));
+                metadata.set("height", Integer.toString(reader.getHeight(0)));
+                metadata.set("width", Integer.toString(reader.getWidth(0)));
+                reader.dispose();
+            }
+        }
+    }
+
+    public void parse(
+            InputStream stream, ContentHandler handler, Metadata metadata)
+            throws IOException, SAXException, TikaException {
+        parse(stream, metadata);
+        XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+        xhtml.startDocument();
+        xhtml.endDocument();
+    }
+
+}

Modified: incubator/tika/trunk/src/main/resources/tika-config.xml
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/tika-config.xml?rev=653686&r1=653685&r2=653686&view=diff
==============================================================================
--- incubator/tika/trunk/src/main/resources/tika-config.xml (original)
+++ incubator/tika/trunk/src/main/resources/tika-config.xml Mon May  5 23:29:19 
2008
@@ -88,6 +88,18 @@
                 
<mime>application/x-vnd.oasis.opendocument.formula-template</mime>
         </parser>
 
+        <parser name="parse-image" 
class="org.apache.tika.parser.image.ImageParser">
+                <mime>image/bmp</mime>
+                <mime>image/gif</mime>
+                <mime>image/jpeg</mime>
+                <mime>image/png</mime>
+                <mime>image/tiff</mime>
+                <mime>image/vnd.wap.wbmp</mime>
+                <mime>image/x-icon</mime>
+                <mime>image/x-psd</mime>
+                <mime>image/x-xcf</mime>
+        </parser>
+
     </parsers>
 
 </properties>
\ No newline at end of file

Added: 
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java?rev=653686&view=auto
==============================================================================
--- 
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
 (added)
+++ 
incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java
 Mon May  5 23:29:19 2008
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.image;
+
+import java.io.InputStream;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.Parser;
+import org.xml.sax.helpers.DefaultHandler;
+
+import junit.framework.TestCase;
+
+public class ImageParserTest extends TestCase {
+
+    private final Parser parser = new ImageParser();
+
+    public void testBMP() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/bmp");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testBMP.bmp");
+        parser.parse(stream, new DefaultHandler(), metadata);
+
+        assertEquals("75", metadata.get("height"));
+        assertEquals("100", metadata.get("width"));
+    }
+
+    public void testGIF() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/gif");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testGIF.gif");
+        parser.parse(stream, new DefaultHandler(), metadata);
+
+        assertEquals("75", metadata.get("height"));
+        assertEquals("100", metadata.get("width"));
+    }
+
+    public void testJPEG() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/jpeg");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testJPEG.jpg");
+        parser.parse(stream, new DefaultHandler(), metadata);
+
+        assertEquals("75", metadata.get("height"));
+        assertEquals("100", metadata.get("width"));
+    }
+
+    public void testPNG() throws Exception {
+        Metadata metadata = new Metadata();
+        metadata.set(Metadata.CONTENT_TYPE, "image/png");
+        InputStream stream =
+            getClass().getResourceAsStream("/test-documents/testPNG.png");
+        parser.parse(stream, new DefaultHandler(), metadata);
+
+        assertEquals("75", metadata.get("height"));
+        assertEquals("100", metadata.get("width"));
+    }
+
+// TODO: Add TIFF support
+//    public void testTIFF() throws Exception {
+//        Metadata metadata = new Metadata();
+//        metadata.set(Metadata.CONTENT_TYPE, "image/tiff");
+//        InputStream stream =
+//            getClass().getResourceAsStream("/test-documents/testTIFF.tif");
+//        parser.parse(stream, new DefaultHandler(), metadata);
+//
+//        assertEquals("75", metadata.get("height"));
+//        assertEquals("100", metadata.get("width"));
+//    }
+
+}

Added: incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp?rev=653686&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif?rev=653686&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg?rev=653686&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/tika/trunk/src/test/resources/test-documents/testPNG.png
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testPNG.png?rev=653686&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testPNG.png
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream

Added: incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif
URL: 
http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif?rev=653686&view=auto
==============================================================================
Binary file - no diff available.

Propchange: incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif
------------------------------------------------------------------------------
    svn:mime-type = application/octet-stream


Reply via email to