Author: mattmann
Date: Wed Jun 25 15:01:08 2014
New Revision: 1605434

URL: http://svn.apache.org/r1605434
Log:
- apply patch for TIKA-1274 ENVI Header parser contributed by Ann Burgess

Added:
    tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/
    
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
    tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/
    
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
    
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr

Added: 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java?rev=1605434&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
 (added)
+++ 
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/envi/EnviHeaderParser.java
 Wed Jun 25 15:01:08 2014
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * 
+ */
+package org.apache.tika.parser.envi;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Collections;
+import java.util.Set;
+import java.nio.charset.Charset;
+
+import org.apache.tika.detect.AutoDetectReader;
+import org.apache.tika.io.CloseShieldInputStream;
+import org.apache.tika.io.IOUtils;
+import org.apache.tika.exception.TikaException;
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.AbstractParser;
+import org.apache.tika.sax.XHTMLContentHandler;
+
+import org.xml.sax.ContentHandler;
+import org.xml.sax.SAXException;
+
+public class EnviHeaderParser extends AbstractParser {
+
+       private static final long serialVersionUID = -1479368523072408091L;
+
+       public static final String ENVI_MIME_TYPE = "application/envi.hdr";
+
+       private static final Set<MediaType> SUPPORTED_TYPES = Collections
+                       .singleton(MediaType.application("envi.hdr"));
+
+       public Set<MediaType> getSupportedTypes(ParseContext context) {
+               return SUPPORTED_TYPES;
+       }
+
+       public void parse(InputStream stream, ContentHandler handler,
+                       Metadata metadata, ParseContext context) throws 
IOException,
+                       SAXException, TikaException {
+
+               // Only outputting the MIME type as metadata
+               metadata.set(Metadata.CONTENT_TYPE, ENVI_MIME_TYPE);
+
+               // The following code was taken from the TXTParser
+               // Automatically detect the character encoding
+               AutoDetectReader reader = new AutoDetectReader(
+                               new CloseShieldInputStream(stream), metadata);
+
+               try {
+                       Charset charset = reader.getCharset();
+                       MediaType type = new MediaType(MediaType.TEXT_PLAIN, 
charset);
+                       // deprecated, see TIKA-431
+                       metadata.set(Metadata.CONTENT_ENCODING, charset.name());
+
+                       XHTMLContentHandler xhtml = new 
XHTMLContentHandler(handler,
+                                       metadata);
+
+                       xhtml.startDocument();
+
+                       // text contents of the xhtml
+                       xhtml.startElement("p");
+                       char[] buffer = new char[4096];
+                       int n = reader.read(buffer);
+                       while (n != -1) {
+                               xhtml.characters(buffer, 0, n);
+                               n = reader.read(buffer);
+                       }
+                       xhtml.endElement("p");
+
+                       xhtml.endDocument();
+               } finally {
+                       reader.close();
+               }
+
+       }
+}

Added: 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java?rev=1605434&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
 (added)
+++ 
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/envi/EnviHeaderParserTest.java
 Wed Jun 25 15:01:08 2014
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.tika.parser.envi;
+
+//Junit imports
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import org.junit.Test;
+
+import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.ParseContext;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.sax.BodyContentHandler;
+import org.xml.sax.ContentHandler;
+import java.io.InputStream;
+
+/*
+ * Test cases to exercise the {@link EnviHeaderParser}.
+ * 
+ */
+public class EnviHeaderParserTest {
+       @Test
+       public void testParseGlobalMetadata() throws Exception {
+               if (System.getProperty("java.version").startsWith("1.5")) {
+                       return;
+               }
+
+               Parser parser = new EnviHeaderParser();
+               ContentHandler handler = new BodyContentHandler();
+               Metadata metadata = new Metadata();
+
+               InputStream stream = EnviHeaderParser.class
+                               
.getResourceAsStream("/test-documents/envi_test_header.hdr");
+               assertNotNull("Test ENVI file not found", stream);
+               try {
+                       parser.parse(stream, handler, metadata, new 
ParseContext());
+               } finally {
+                       stream.close();
+               }
+
+               // Check content of test file
+               String content = handler.toString();
+               assertTrue(content.contains("ENVI"));
+               assertTrue(content.contains("samples = 2400"));
+               assertTrue(content.contains("lines   = 2400"));
+               assertTrue(content.contains("bands   = 7"));
+               assertTrue(content.contains("header offset = 0"));
+               assertTrue(content.contains("file type = ENVI Standard"));
+               assertTrue(content
+                               .contains("projection info = {16, 6371007.2, 
0.000000, 0.0, 0.0, Sinusoidal, units=Meters}"));
+       }
+}

Added: 
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr
URL: 
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr?rev=1605434&view=auto
==============================================================================
--- 
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr 
(added)
+++ 
tika/trunk/tika-parsers/src/test/resources/test-documents/envi_test_header.hdr 
Wed Jun 25 15:01:08 2014
@@ -0,0 +1,16 @@
+ENVI
+description = {
+  GEO-TIFF File Imported into ENVI [Fri May 25 14:06:23 2012]}
+samples = 2400
+lines   = 2400
+bands   = 7
+header offset = 0
+file type = ENVI Standard
+data type = 2
+interleave = bip
+sensor type = Unknown
+byte order = 0
+map info = {Sinusoidal, 1.5000, 1.5000, -10007091.3643, 5559289.2856, 
4.6331271653e+02, 4.6331271653e+02, , units=Meters}
+projection info = {16, 6371007.2, 0.000000, 0.0, 0.0, Sinusoidal, units=Meters}
+coordinate system string = 
{PROJCS["Sinusoidal",GEOGCS["GCS_ELLIPSE_BASED_1",DATUM["D_ELLIPSE_BASED_1",SPHEROID["S_ELLIPSE_BASED_1",6371007.181,0.0]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]],PROJECTION["Sinusoidal"],PARAMETER["False_Easting",0.0],PARAMETER["False_Northing",0.0],PARAMETER["Central_Meridian",0.0],UNIT["Meter",1.0]]}
+wavelength units = Unknown


Reply via email to