Author: mattmann
Date: Wed Jun 25 15:01:08 2014
New Revision: 1605434
URL: http://svn.apache.org/r1605434
Log:
- apply patch for TIKA-1274 ENVI Header parser contributed by Ann Burgess
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr
Added:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java?rev=1605434&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
(added)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
Wed Jun 25 15:01:08 2014
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ *
+ */
+package org.apache.tika.parser.envi;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+import java.nio.charset.Charset;
+
+import org.apache.tika.detect.AutoDetectReader;
+import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class EnviHeaderParser extends AbstractParser {
+
+ private static final long serialVersionUID = -1479368523072408091L;
+
+ public static final String ENVI_MIME_TYPE = "application/envi.hdr";
+
+ private static final Set<MediaType> SUPPORTED_TYPES = Collections
+ .singleton(MediaType.application("envi.hdr"));
+
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ return SUPPORTED_TYPES;
+ }
+
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws
IOException,
+ SAXException, TikaException {
+
+ // Only outputting the MIME type as metadata
+ metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE);
+
+ // The following code was taken from the TXTParser
+ // Automatically detect the character encoding
+ AutoDetectReader reader = new AutoDetectReader(
+ new CloseShieldInputStream(stream), metadata);
+
+ try {
+ Charset charset = reader.getCharset();
+ MediaType type = new MediaType(MediaType.TEXT_PLAIN,
charset);
+ // deprecated, see TIKA-431
+ metadata.set(Metadata.CONTENT_ENCODING, charset.name());
+
+ XHTMLContentHandler xhtml = new
XHTMLContentHandler(handler,
+ metadata);
+
+ xhtml.startDocument();
+
+ // text contents of the xhtml
+ xhtml.startElement("p");
+ char[] buffer = new char[4096];
+ int n = reader.read(buffer);
+ while (n != -1) {
+ xhtml.characters(buffer, 0, n);
+ n = reader.read(buffer);
+ }
+ xhtml.endElement("p");
+
+ xhtml.endDocument();
+ } finally {
+ reader.close();
+ }
+
+ }
+}
Added:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java?rev=1605434&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
(added)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
Wed Jun 25 15:01:08 2014
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.envi;
+
+//Junit imports
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import org.junit.Test;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import java.io.InputStream;
+
+/*
+ * Test cases to exercise the {@link EnviHeaderParser}.
+ *
+ */
+public class EnviHeaderParserTest {
+ @Test
+ public void testParseGlobalMetadata() throws Exception {
+ if (System.getProperty("java.version").startsWith("1.5")) {
+ return;
+ }
+
+ Parser parser = new EnviHeaderParser();
+ ContentHandler handler = new BodyContentHandler();
+ Metadata metadata = new Metadata();
+
+ InputStream stream = EnviHeaderParser.class
+
.getResourceAsStream("/test-documents/envi_test_header.hdr");
+ assertNotNull("Test ENVI file not found", stream);
+ try {
+ parser.parse(stream, handler, metadata, new
ParseContext());
+ } finally {
+ stream.close();
+ }
+
+ // Check content of test file
+ String content = handler.toString();
+ assertTrue(content.contains("ENVI"));
+ assertTrue(content.contains("samples = 2400"));
+ assertTrue(content.contains("lines = 2400"));
+ assertTrue(content.contains("bands = 7"));
+ assertTrue(content.contains("header offset = 0"));
+ assertTrue(content.contains("file type = ENVI Standard"));
+ assertTrue(content
+ .contains("projection info = {16, 6371007.2,
0.000000, 0.0, 0.0, Sinusoidal, units=Meters}"));
+ }
+}
Added:
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr?rev=1605434&view=auto
==============================================================================
---
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr
(added)
+++
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr
Wed Jun 25 15:01:08 2014
@@ -0,0 +1,16 @@
+ENVI
+description = {
+ GEO-TIFF File Imported into ENVI [Fri May 25 14:06:23 2012]}
+samples = 2400
+lines = 2400
+bands = 7
+header offset = 0
+file type = ENVI Standard
+data type = 2
+interleave = bip
+sensor type = Unknown
+byte order = 0
+map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856,
4.6331271653e+02, 4.6331271653e+02, , units=Meters}
+projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}
+coordinate system string =
{PROJCS["Sinusoidal",GEOGCS["GCS_ELLIPSE_BASED_1",DATUM["D_ELLIPSE_BASED_1",SPHEROID["S_ELLIPSE_BASED_1",6371007.181,0.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Sinusoidal"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],UNIT["Meter",1.0]]}
+wavelength units = Unknown