Author: jukka Date: Mon May 5 23:29:19 2008 New Revision: 653686 URL: http://svn.apache.org/viewvc?rev=653686&view=rev Log: TIKA-92: Image metadata extraction - Added a simple ImageParser based on ImageIO - Currently only supports custom "width" and "height" metadata fields - Included a few test images
Added: incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp (with props) incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif (with props) incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg (with props) incubator/tika/trunk/src/test/resources/test-documents/testPNG.png (with props) incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif (with props) Modified: incubator/tika/trunk/src/main/resources/tika-config.xml Added: incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java?rev=653686&view=auto ============================================================================== --- incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java (added) +++ incubator/tika/trunk/src/main/java/org/apache/tika/parser/image/ImageParser.java Mon May 5 23:29:19 2008 @@ -0,0 +1,46 @@ +package org.apache.tika.parser.image; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; + +import javax.imageio.ImageIO; +import javax.imageio.ImageReader; + +import org.apache.commons.io.input.CloseShieldInputStream; +import org.apache.tika.exception.TikaException; +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.Parser; +import org.apache.tika.sax.XHTMLContentHandler; +import org.xml.sax.ContentHandler; +import org.xml.sax.SAXException; + +public class ImageParser implements Parser { + + public void parse(InputStream stream, Metadata metadata) + throws IOException, TikaException { + String type = metadata.get(Metadata.CONTENT_TYPE); + if (type != null) { + Iterator<ImageReader> iterator = + ImageIO.getImageReadersByMIMEType(type); + if (iterator.hasNext()) { + ImageReader reader = iterator.next(); + reader.setInput(ImageIO.createImageInputStream( + new CloseShieldInputStream(stream))); + metadata.set("height", Integer.toString(reader.getHeight(0))); + metadata.set("width", Integer.toString(reader.getWidth(0))); + reader.dispose(); + } + } + } + + public void parse( + InputStream stream, ContentHandler handler, Metadata metadata) + throws IOException, SAXException, TikaException { + parse(stream, metadata); + XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata); + xhtml.startDocument(); + xhtml.endDocument(); + } + +} Modified: incubator/tika/trunk/src/main/resources/tika-config.xml URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/main/resources/tika-config.xml?rev=653686&r1=653685&r2=653686&view=diff ============================================================================== --- incubator/tika/trunk/src/main/resources/tika-config.xml (original) +++ incubator/tika/trunk/src/main/resources/tika-config.xml Mon May 5 23:29:19 2008 @@ -88,6 +88,18 @@ <mime>application/x-vnd.oasis.opendocument.formula-template</mime> </parser> + <parser name="parse-image" class="org.apache.tika.parser.image.ImageParser"> + <mime>image/bmp</mime> + <mime>image/gif</mime> + <mime>image/jpeg</mime> + <mime>image/png</mime> + <mime>image/tiff</mime> + <mime>image/vnd.wap.wbmp</mime> + <mime>image/x-icon</mime> + <mime>image/x-psd</mime> + <mime>image/x-xcf</mime> + </parser> + </parsers> </properties> \ No newline at end of file Added: incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java?rev=653686&view=auto ============================================================================== --- incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java (added) +++ incubator/tika/trunk/src/test/java/org/apache/tika/parser/image/ImageParserTest.java Mon May 5 23:29:19 2008 @@ -0,0 +1,87 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tika.parser.image; + +import java.io.InputStream; + +import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.Parser; +import org.xml.sax.helpers.DefaultHandler; + +import junit.framework.TestCase; + +public class ImageParserTest extends TestCase { + + private final Parser parser = new ImageParser(); + + public void testBMP() throws Exception { + Metadata metadata = new Metadata(); + metadata.set(Metadata.CONTENT_TYPE, "image/bmp"); + InputStream stream = + getClass().getResourceAsStream("/test-documents/testBMP.bmp"); + parser.parse(stream, new DefaultHandler(), metadata); + + assertEquals("75", metadata.get("height")); + assertEquals("100", metadata.get("width")); + } + + public void testGIF() throws Exception { + Metadata metadata = new Metadata(); + metadata.set(Metadata.CONTENT_TYPE, "image/gif"); + InputStream stream = + getClass().getResourceAsStream("/test-documents/testGIF.gif"); + parser.parse(stream, new DefaultHandler(), metadata); + + assertEquals("75", metadata.get("height")); + assertEquals("100", metadata.get("width")); + } + + public void testJPEG() throws Exception { + Metadata metadata = new Metadata(); + metadata.set(Metadata.CONTENT_TYPE, "image/jpeg"); + InputStream stream = + getClass().getResourceAsStream("/test-documents/testJPEG.jpg"); + parser.parse(stream, new DefaultHandler(), metadata); + + assertEquals("75", metadata.get("height")); + assertEquals("100", metadata.get("width")); + } + + public void testPNG() throws Exception { + Metadata metadata = new Metadata(); + metadata.set(Metadata.CONTENT_TYPE, "image/png"); + InputStream stream = + getClass().getResourceAsStream("/test-documents/testPNG.png"); + parser.parse(stream, new DefaultHandler(), metadata); + + assertEquals("75", metadata.get("height")); + assertEquals("100", metadata.get("width")); + } + +// TODO: Add TIFF support +// public void testTIFF() throws Exception { +// Metadata metadata = new Metadata(); +// metadata.set(Metadata.CONTENT_TYPE, "image/tiff"); +// InputStream stream = +// getClass().getResourceAsStream("/test-documents/testTIFF.tif"); +// parser.parse(stream, new DefaultHandler(), metadata); +// +// assertEquals("75", metadata.get("height")); +// assertEquals("100", metadata.get("width")); +// } + +} Added: incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp?rev=653686&view=auto ============================================================================== Binary file - no diff available. Propchange: incubator/tika/trunk/src/test/resources/test-documents/testBMP.bmp ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif?rev=653686&view=auto ============================================================================== Binary file - no diff available. Propchange: incubator/tika/trunk/src/test/resources/test-documents/testGIF.gif ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg?rev=653686&view=auto ============================================================================== Binary file - no diff available. Propchange: incubator/tika/trunk/src/test/resources/test-documents/testJPEG.jpg ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: incubator/tika/trunk/src/test/resources/test-documents/testPNG.png URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testPNG.png?rev=653686&view=auto ============================================================================== Binary file - no diff available. Propchange: incubator/tika/trunk/src/test/resources/test-documents/testPNG.png ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream Added: incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif URL: http://svn.apache.org/viewvc/incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif?rev=653686&view=auto ============================================================================== Binary file - no diff available. Propchange: incubator/tika/trunk/src/test/resources/test-documents/testTIFF.tif ------------------------------------------------------------------------------ svn:mime-type = application/octet-stream