Author: tpalsulich
Date: Wed Dec 24 08:16:45 2014
New Revision: 1647743
URL: http://svn.apache.org/r1647743
Log:
Pure whitespace change. Reformat the GDALParser and its test.
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java?rev=1647743&r1=1647742&r2=1647743&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/gdal/GDALParser.java
Wed Dec 24 08:16:45 2014
@@ -31,7 +31,6 @@ import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
-
//Tika imports
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
@@ -42,6 +41,7 @@ import org.apache.tika.parser.AbstractPa
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.external.ExternalParser;
import org.apache.tika.sax.XHTMLContentHandler;
+
import static org.apache.tika.parser.external.ExternalParser.INPUT_FILE_TOKEN;
//SAX imports
@@ -49,376 +49,373 @@ import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
/**
- *
* Wraps execution of the <a href="http//gdal.org/">Geospatial Data Abstraction
* Library (GDAL)</a> <code>gdalinfo</code> tool used to extract geospatial
* information out of hundreds of geo file formats.
- *
+ * <p/>
* The parser requires the installation of GDAL and for <code>gdalinfo</code>
to
* be located on the path.
- *
+ * <p/>
* Basic information (Size, Coordinate System, Bounding Box, Driver, and
* resource info) are extracted as metadata, and the remaining metadata
patterns
* are extracted and added.
- *
+ * <p/>
* The output of the command is available from the provided
* {@link ContentHandler} in the
* {@link #parse(InputStream, ContentHandler, Metadata, ParseContext)} method.
- *
*/
public class GDALParser extends AbstractParser {
- private static final long serialVersionUID = -3869130527323941401L;
+ private static final long serialVersionUID = -3869130527323941401L;
- private String command;
+ private String command;
- public GDALParser() {
- setCommand("gdalinfo ${INPUT}");
- }
-
- public void setCommand(String command) {
- this.command = command;
- }
-
- public String getCommand() {
- return this.command;
- }
-
- public String processCommand(InputStream stream) {
- TikaInputStream tis = (TikaInputStream) stream;
- String pCommand = this.command;
- try {
- if (this.command.indexOf(INPUT_FILE_TOKEN) != -1) {
- pCommand =
this.command.replace(INPUT_FILE_TOKEN, tis.getFile()
- .getPath());
- }
- } catch (Exception e) {
- e.printStackTrace();
- }
-
- return pCommand;
- }
-
- @Override
- public Set<MediaType> getSupportedTypes(ParseContext context) {
- Set<MediaType> types = new HashSet<MediaType>();
- types.add(MediaType.application("x-netcdf"));
- types.add(MediaType.application("vrt"));
- types.add(MediaType.image("geotiff"));
- types.add(MediaType.image("ntif"));
- types.add(MediaType.application("x-rpf-toc"));
- types.add(MediaType.application("x-ecrg-toc"));
- types.add(MediaType.image("hfa"));
- types.add(MediaType.image("sar-ceos"));
- types.add(MediaType.image("ceos"));
- types.add(MediaType.application("jaxa-pal-sar"));
- types.add(MediaType.application("gff"));
- types.add(MediaType.application("elas"));
- types.add(MediaType.application("aig"));
- types.add(MediaType.application("aaigrid"));
- types.add(MediaType.application("grass-ascii-grid"));
- types.add(MediaType.application("sdts-raster"));
- types.add(MediaType.application("dted"));
- types.add(MediaType.image("png"));
- types.add(MediaType.image("jpeg"));
- types.add(MediaType.image("raster"));
- types.add(MediaType.application("jdem"));
- types.add(MediaType.image("gif"));
- types.add(MediaType.image("big-gif"));
- types.add(MediaType.image("envisat"));
- types.add(MediaType.image("fits"));
- types.add(MediaType.application("fits"));
- types.add(MediaType.image("bsb"));
- types.add(MediaType.application("xpm"));
- types.add(MediaType.image("bmp"));
- types.add(MediaType.image("x-dimap"));
- types.add(MediaType.image("x-airsar"));
- types.add(MediaType.application("x-rs2"));
- types.add(MediaType.application("x-pcidsk"));
- types.add(MediaType.application("pcisdk"));
- types.add(MediaType.image("x-pcraster"));
- types.add(MediaType.image("ilwis"));
- types.add(MediaType.image("sgi"));
- types.add(MediaType.application("x-srtmhgt"));
- types.add(MediaType.application("leveller"));
- types.add(MediaType.application("terragen"));
- types.add(MediaType.application("x-gmt"));
- types.add(MediaType.application("x-isis3"));
- types.add(MediaType.application("x-isis2"));
- types.add(MediaType.application("x-pds"));
- types.add(MediaType.application("x-til"));
- types.add(MediaType.application("x-ers"));
- types.add(MediaType.application("x-l1b"));
- types.add(MediaType.image("fit"));
- types.add(MediaType.application("x-grib"));
- types.add(MediaType.image("jp2"));
- types.add(MediaType.application("x-rmf"));
- types.add(MediaType.application("x-wcs"));
- types.add(MediaType.application("x-wms"));
- types.add(MediaType.application("x-msgn"));
- types.add(MediaType.application("x-wms"));
- types.add(MediaType.application("x-wms"));
- types.add(MediaType.application("x-rst"));
- types.add(MediaType.application("x-ingr"));
- types.add(MediaType.application("x-gsag"));
- types.add(MediaType.application("x-gsbg"));
- types.add(MediaType.application("x-gs7bg"));
- types.add(MediaType.application("x-cosar"));
- types.add(MediaType.application("x-tsx"));
- types.add(MediaType.application("x-coasp"));
- types.add(MediaType.application("x-r"));
- types.add(MediaType.application("x-map"));
- types.add(MediaType.application("x-pnm"));
- types.add(MediaType.application("x-doq1"));
- types.add(MediaType.application("x-doq2"));
- types.add(MediaType.application("x-envi"));
- types.add(MediaType.application("x-envi-hdr"));
- types.add(MediaType.application("x-generic-bin"));
- types.add(MediaType.application("x-p-aux"));
- types.add(MediaType.image("x-mff"));
- types.add(MediaType.image("x-mff2"));
- types.add(MediaType.image("x-fujibas"));
- types.add(MediaType.application("x-gsc"));
- types.add(MediaType.application("x-fast"));
- types.add(MediaType.application("x-bt"));
- types.add(MediaType.application("x-lan"));
- types.add(MediaType.application("x-cpg"));
- types.add(MediaType.image("ida"));
- types.add(MediaType.application("x-ndf"));
- types.add(MediaType.image("eir"));
- types.add(MediaType.application("x-dipex"));
- types.add(MediaType.application("x-lcp"));
- types.add(MediaType.application("x-gtx"));
- types.add(MediaType.application("x-los-las"));
- types.add(MediaType.application("x-ntv2"));
- types.add(MediaType.application("x-ctable2"));
- types.add(MediaType.application("x-ace2"));
- types.add(MediaType.application("x-snodas"));
- types.add(MediaType.application("x-kro"));
- types.add(MediaType.image("arg"));
- types.add(MediaType.application("x-rik"));
- types.add(MediaType.application("x-usgs-dem"));
- types.add(MediaType.application("x-gxf"));
- types.add(MediaType.application("x-dods"));
- types.add(MediaType.application("x-http"));
- types.add(MediaType.application("x-bag"));
- types.add(MediaType.application("x-hdf"));
- types.add(MediaType.image("x-hdf5-image"));
- types.add(MediaType.application("x-nwt-grd"));
- types.add(MediaType.application("x-nwt-grc"));
- types.add(MediaType.image("adrg"));
- types.add(MediaType.image("x-srp"));
- types.add(MediaType.application("x-blx"));
- types.add(MediaType.application("x-rasterlite"));
- types.add(MediaType.application("x-epsilon"));
- types.add(MediaType.application("x-sdat"));
- types.add(MediaType.application("x-kml"));
- types.add(MediaType.application("x-xyz"));
- types.add(MediaType.application("x-geo-pdf"));
- types.add(MediaType.image("x-ozi"));
- types.add(MediaType.application("x-ctg"));
- types.add(MediaType.application("x-e00-grid"));
- types.add(MediaType.application("x-zmap"));
- types.add(MediaType.application("x-webp"));
- types.add(MediaType.application("x-ngs-geoid"));
- types.add(MediaType.application("x-mbtiles"));
- types.add(MediaType.application("x-ppi"));
- types.add(MediaType.application("x-cappi"));
- return types;
- }
-
- @Override
- public void parse(InputStream stream, ContentHandler handler,
- Metadata metadata, ParseContext context) throws
IOException,
- SAXException, TikaException {
-
- if (!ExternalParser.check("gdalinfo")){
- return;
- }
-
- // first set up and run GDAL
- // process the command
- TemporaryResources tmp = new TemporaryResources();
- TikaInputStream tis = TikaInputStream.get(stream, tmp);
-
- String runCommand = processCommand(tis);
- String output = execCommand(new String[] { runCommand });
-
- // now extract the actual metadata params
- // from the GDAL output in the content stream
- // to do this, we need to literally process the output
- // from the invoked command b/c we can't read metadata and
- // output text from the handler in ExternalParser
- // at the same time, so for now, we can't use the
- // ExternalParser to do this and I've had to bring some of
- // that functionality directly into this class
- // TODO: investigate a way to do both using ExternalParser
-
- extractMetFromOutput(output, metadata);
- applyPatternsToOutput(output, metadata, getPatterns());
-
- // make the content handler and provide output there
- // now that we have metadata
- processOutput(handler, metadata, output);
- }
-
- private Map<Pattern, String> getPatterns() {
- Map<Pattern, String> patterns = new HashMap<Pattern, String>();
- this.addPatternWithColon("Driver", patterns);
- this.addPatternWithColon("Files", patterns);
- this.addPatternWithIs("Size", patterns);
- this.addPatternWithIs("Coordinate System", patterns);
- this.addBoundingBoxPattern("Upper Left", patterns);
- this.addBoundingBoxPattern("Lower Left", patterns);
- this.addBoundingBoxPattern("Upper Right", patterns);
- this.addBoundingBoxPattern("Lower Right", patterns);
- return patterns;
- }
-
- private void addPatternWithColon(String name, Map<Pattern, String>
patterns) {
- patterns.put(
- Pattern.compile(name + "\\:\\s*([A-Za-z0-9/
_\\-\\.]+)\\s*"),
- name);
- }
-
- private void addPatternWithIs(String name, Map<Pattern, String>
patterns) {
- patterns.put(Pattern.compile(name + " is
([A-Za-z0-9\\.,\\s`']+)"),
- name);
- }
-
- private void addBoundingBoxPattern(String name,
- Map<Pattern, String> patterns) {
- patterns.put(
- Pattern.compile(name
- +
"\\s*\\(\\s*([0-9]+\\.[0-9]+\\s*,\\s*[0-9]+\\.[0-9]+\\s*)\\)\\s*"),
- name);
- }
-
- private void extractMetFromOutput(String output, Metadata met) {
- Scanner scanner = new Scanner(output);
- String currentKey = null;
- String[] headings = {"Subdatasets", "Corner Coordinates"};
- StringBuilder metVal = new StringBuilder();
- while (scanner.hasNextLine()) {
- String line = scanner.nextLine();
- if (line.contains("=") || hasHeadings(line, headings)) {
- if (currentKey != null) {
- // time to flush this key and met val
- met.add(currentKey, metVal.toString());
- }
- metVal.setLength(0);
-
- String[] lineToks = line.split("=");
- currentKey = lineToks[0].trim();
- if (lineToks.length == 2) {
- metVal.append(lineToks[1]);
- } else {
- metVal.append("");
- }
- } else {
- metVal.append(line);
- }
-
- }
- }
-
- private boolean hasHeadings(String line, String[] headings){
- if (headings != null && headings.length > 0){
- for(String heading: headings){
- if(line.contains(heading)){
- return true;
- }
- }
- return false;
- }
- else return false;
- }
-
- private void applyPatternsToOutput(String output, Metadata metadata,
- Map<Pattern, String> metadataPatterns) {
- Scanner scanner = new Scanner(output);
- while (scanner.hasNextLine()) {
- String line = scanner.nextLine();
- for (Pattern p : metadataPatterns.keySet()) {
- Matcher m = p.matcher(line);
- if (m.find()) {
- if (metadataPatterns.get(p) != null
- &&
!metadataPatterns.get(p).equals("")) {
-
metadata.add(metadataPatterns.get(p), m.group(1));
- } else {
- metadata.add(m.group(1),
m.group(2));
- }
- }
- }
- }
-
- }
-
- private String execCommand(String[] cmd) throws IOException {
- // Execute
- Process process;
- String output = null;
- if (cmd.length == 1) {
- process = Runtime.getRuntime().exec(cmd[0]);
- } else {
- process = Runtime.getRuntime().exec(cmd);
- }
-
- try {
- InputStream out = process.getInputStream();
-
- try {
- output = extractOutput(out);
- } catch (Exception e) {
- e.printStackTrace();
- output = "";
- }
-
- } finally {
- try {
- process.waitFor();
- } catch (InterruptedException ignore) {
- }
-
- return output;
- }
-
- }
-
- private String extractOutput(InputStream stream) throws SAXException,
- IOException {
- StringBuffer sb = new StringBuffer();
- Reader reader = new InputStreamReader(stream, "UTF-8");
- try {
- char[] buffer = new char[1024];
- for (int n = reader.read(buffer); n != -1; n =
reader.read(buffer)) {
- sb.append(buffer, 0, n);
- }
- } finally {
- reader.close();
- return sb.toString();
- }
- }
-
- private void processOutput(ContentHandler handler, Metadata metadata,
- String output) throws SAXException, IOException {
- XHTMLContentHandler xhtml = new XHTMLContentHandler(handler,
metadata);
- InputStream stream = new
ByteArrayInputStream(output.getBytes("UTF-8"));
- Reader reader = new InputStreamReader(stream, "UTF-8");
- try {
- xhtml.startDocument();
- xhtml.startElement("p");
- char[] buffer = new char[1024];
- for (int n = reader.read(buffer); n != -1; n =
reader.read(buffer)) {
- xhtml.characters(buffer, 0, n);
- }
- xhtml.endElement("p");
-
- } finally {
- reader.close();
- xhtml.endDocument();
- }
+ public GDALParser() {
+ setCommand("gdalinfo ${INPUT}");
+ }
+
+ public void setCommand(String command) {
+ this.command = command;
+ }
+
+ public String getCommand() {
+ return this.command;
+ }
+
+ public String processCommand(InputStream stream) {
+ TikaInputStream tis = (TikaInputStream) stream;
+ String pCommand = this.command;
+ try {
+ if (this.command.indexOf(INPUT_FILE_TOKEN) != -1) {
+ pCommand = this.command.replace(INPUT_FILE_TOKEN, tis.getFile()
+ .getPath());
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ }
+
+ return pCommand;
+ }
+
+ @Override
+ public Set<MediaType> getSupportedTypes(ParseContext context) {
+ Set<MediaType> types = new HashSet<MediaType>();
+ types.add(MediaType.application("x-netcdf"));
+ types.add(MediaType.application("vrt"));
+ types.add(MediaType.image("geotiff"));
+ types.add(MediaType.image("ntif"));
+ types.add(MediaType.application("x-rpf-toc"));
+ types.add(MediaType.application("x-ecrg-toc"));
+ types.add(MediaType.image("hfa"));
+ types.add(MediaType.image("sar-ceos"));
+ types.add(MediaType.image("ceos"));
+ types.add(MediaType.application("jaxa-pal-sar"));
+ types.add(MediaType.application("gff"));
+ types.add(MediaType.application("elas"));
+ types.add(MediaType.application("aig"));
+ types.add(MediaType.application("aaigrid"));
+ types.add(MediaType.application("grass-ascii-grid"));
+ types.add(MediaType.application("sdts-raster"));
+ types.add(MediaType.application("dted"));
+ types.add(MediaType.image("png"));
+ types.add(MediaType.image("jpeg"));
+ types.add(MediaType.image("raster"));
+ types.add(MediaType.application("jdem"));
+ types.add(MediaType.image("gif"));
+ types.add(MediaType.image("big-gif"));
+ types.add(MediaType.image("envisat"));
+ types.add(MediaType.image("fits"));
+ types.add(MediaType.application("fits"));
+ types.add(MediaType.image("bsb"));
+ types.add(MediaType.application("xpm"));
+ types.add(MediaType.image("bmp"));
+ types.add(MediaType.image("x-dimap"));
+ types.add(MediaType.image("x-airsar"));
+ types.add(MediaType.application("x-rs2"));
+ types.add(MediaType.application("x-pcidsk"));
+ types.add(MediaType.application("pcisdk"));
+ types.add(MediaType.image("x-pcraster"));
+ types.add(MediaType.image("ilwis"));
+ types.add(MediaType.image("sgi"));
+ types.add(MediaType.application("x-srtmhgt"));
+ types.add(MediaType.application("leveller"));
+ types.add(MediaType.application("terragen"));
+ types.add(MediaType.application("x-gmt"));
+ types.add(MediaType.application("x-isis3"));
+ types.add(MediaType.application("x-isis2"));
+ types.add(MediaType.application("x-pds"));
+ types.add(MediaType.application("x-til"));
+ types.add(MediaType.application("x-ers"));
+ types.add(MediaType.application("x-l1b"));
+ types.add(MediaType.image("fit"));
+ types.add(MediaType.application("x-grib"));
+ types.add(MediaType.image("jp2"));
+ types.add(MediaType.application("x-rmf"));
+ types.add(MediaType.application("x-wcs"));
+ types.add(MediaType.application("x-wms"));
+ types.add(MediaType.application("x-msgn"));
+ types.add(MediaType.application("x-wms"));
+ types.add(MediaType.application("x-wms"));
+ types.add(MediaType.application("x-rst"));
+ types.add(MediaType.application("x-ingr"));
+ types.add(MediaType.application("x-gsag"));
+ types.add(MediaType.application("x-gsbg"));
+ types.add(MediaType.application("x-gs7bg"));
+ types.add(MediaType.application("x-cosar"));
+ types.add(MediaType.application("x-tsx"));
+ types.add(MediaType.application("x-coasp"));
+ types.add(MediaType.application("x-r"));
+ types.add(MediaType.application("x-map"));
+ types.add(MediaType.application("x-pnm"));
+ types.add(MediaType.application("x-doq1"));
+ types.add(MediaType.application("x-doq2"));
+ types.add(MediaType.application("x-envi"));
+ types.add(MediaType.application("x-envi-hdr"));
+ types.add(MediaType.application("x-generic-bin"));
+ types.add(MediaType.application("x-p-aux"));
+ types.add(MediaType.image("x-mff"));
+ types.add(MediaType.image("x-mff2"));
+ types.add(MediaType.image("x-fujibas"));
+ types.add(MediaType.application("x-gsc"));
+ types.add(MediaType.application("x-fast"));
+ types.add(MediaType.application("x-bt"));
+ types.add(MediaType.application("x-lan"));
+ types.add(MediaType.application("x-cpg"));
+ types.add(MediaType.image("ida"));
+ types.add(MediaType.application("x-ndf"));
+ types.add(MediaType.image("eir"));
+ types.add(MediaType.application("x-dipex"));
+ types.add(MediaType.application("x-lcp"));
+ types.add(MediaType.application("x-gtx"));
+ types.add(MediaType.application("x-los-las"));
+ types.add(MediaType.application("x-ntv2"));
+ types.add(MediaType.application("x-ctable2"));
+ types.add(MediaType.application("x-ace2"));
+ types.add(MediaType.application("x-snodas"));
+ types.add(MediaType.application("x-kro"));
+ types.add(MediaType.image("arg"));
+ types.add(MediaType.application("x-rik"));
+ types.add(MediaType.application("x-usgs-dem"));
+ types.add(MediaType.application("x-gxf"));
+ types.add(MediaType.application("x-dods"));
+ types.add(MediaType.application("x-http"));
+ types.add(MediaType.application("x-bag"));
+ types.add(MediaType.application("x-hdf"));
+ types.add(MediaType.image("x-hdf5-image"));
+ types.add(MediaType.application("x-nwt-grd"));
+ types.add(MediaType.application("x-nwt-grc"));
+ types.add(MediaType.image("adrg"));
+ types.add(MediaType.image("x-srp"));
+ types.add(MediaType.application("x-blx"));
+ types.add(MediaType.application("x-rasterlite"));
+ types.add(MediaType.application("x-epsilon"));
+ types.add(MediaType.application("x-sdat"));
+ types.add(MediaType.application("x-kml"));
+ types.add(MediaType.application("x-xyz"));
+ types.add(MediaType.application("x-geo-pdf"));
+ types.add(MediaType.image("x-ozi"));
+ types.add(MediaType.application("x-ctg"));
+ types.add(MediaType.application("x-e00-grid"));
+ types.add(MediaType.application("x-zmap"));
+ types.add(MediaType.application("x-webp"));
+ types.add(MediaType.application("x-ngs-geoid"));
+ types.add(MediaType.application("x-mbtiles"));
+ types.add(MediaType.application("x-ppi"));
+ types.add(MediaType.application("x-cappi"));
+ return types;
+ }
+
+ @Override
+ public void parse(InputStream stream, ContentHandler handler,
+ Metadata metadata, ParseContext context) throws
IOException,
+ SAXException, TikaException {
+
+ if (!ExternalParser.check("gdalinfo")) {
+ return;
+ }
+
+ // first set up and run GDAL
+ // process the command
+ TemporaryResources tmp = new TemporaryResources();
+ TikaInputStream tis = TikaInputStream.get(stream, tmp);
+
+ String runCommand = processCommand(tis);
+ String output = execCommand(new String[]{runCommand});
+
+ // now extract the actual metadata params
+ // from the GDAL output in the content stream
+ // to do this, we need to literally process the output
+ // from the invoked command b/c we can't read metadata and
+ // output text from the handler in ExternalParser
+ // at the same time, so for now, we can't use the
+ // ExternalParser to do this and I've had to bring some of
+ // that functionality directly into this class
+ // TODO: investigate a way to do both using ExternalParser
+
+ extractMetFromOutput(output, metadata);
+ applyPatternsToOutput(output, metadata, getPatterns());
+
+ // make the content handler and provide output there
+ // now that we have metadata
+ processOutput(handler, metadata, output);
+ }
+
+ private Map<Pattern, String> getPatterns() {
+ Map<Pattern, String> patterns = new HashMap<Pattern, String>();
+ this.addPatternWithColon("Driver", patterns);
+ this.addPatternWithColon("Files", patterns);
+ this.addPatternWithIs("Size", patterns);
+ this.addPatternWithIs("Coordinate System", patterns);
+ this.addBoundingBoxPattern("Upper Left", patterns);
+ this.addBoundingBoxPattern("Lower Left", patterns);
+ this.addBoundingBoxPattern("Upper Right", patterns);
+ this.addBoundingBoxPattern("Lower Right", patterns);
+ return patterns;
+ }
+
+ private void addPatternWithColon(String name, Map<Pattern, String>
patterns) {
+ patterns.put(
+ Pattern.compile(name + "\\:\\s*([A-Za-z0-9/ _\\-\\.]+)\\s*"),
+ name);
+ }
+
+ private void addPatternWithIs(String name, Map<Pattern, String> patterns) {
+ patterns.put(Pattern.compile(name + " is ([A-Za-z0-9\\.,\\s`']+)"),
+ name);
+ }
+
+ private void addBoundingBoxPattern(String name,
+ Map<Pattern, String> patterns) {
+ patterns.put(
+ Pattern.compile(name
+ +
"\\s*\\(\\s*([0-9]+\\.[0-9]+\\s*,\\s*[0-9]+\\.[0-9]+\\s*)\\)\\s*"),
+ name);
+ }
+
+ private void extractMetFromOutput(String output, Metadata met) {
+ Scanner scanner = new Scanner(output);
+ String currentKey = null;
+ String[] headings = {"Subdatasets", "Corner Coordinates"};
+ StringBuilder metVal = new StringBuilder();
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine();
+ if (line.contains("=") || hasHeadings(line, headings)) {
+ if (currentKey != null) {
+ // time to flush this key and met val
+ met.add(currentKey, metVal.toString());
+ }
+ metVal.setLength(0);
+
+ String[] lineToks = line.split("=");
+ currentKey = lineToks[0].trim();
+ if (lineToks.length == 2) {
+ metVal.append(lineToks[1]);
+ } else {
+ metVal.append("");
+ }
+ } else {
+ metVal.append(line);
+ }
+
+ }
+ }
+
+ private boolean hasHeadings(String line, String[] headings) {
+ if (headings != null && headings.length > 0) {
+ for (String heading : headings) {
+ if (line.contains(heading)) {
+ return true;
+ }
+ }
+ return false;
+ } else return false;
+ }
+
+ private void applyPatternsToOutput(String output, Metadata metadata,
+ Map<Pattern, String> metadataPatterns) {
+ Scanner scanner = new Scanner(output);
+ while (scanner.hasNextLine()) {
+ String line = scanner.nextLine();
+ for (Pattern p : metadataPatterns.keySet()) {
+ Matcher m = p.matcher(line);
+ if (m.find()) {
+ if (metadataPatterns.get(p) != null
+ && !metadataPatterns.get(p).equals("")) {
+ metadata.add(metadataPatterns.get(p), m.group(1));
+ } else {
+ metadata.add(m.group(1), m.group(2));
+ }
+ }
+ }
+ }
+
+ }
+
+ private String execCommand(String[] cmd) throws IOException {
+ // Execute
+ Process process;
+ String output = null;
+ if (cmd.length == 1) {
+ process = Runtime.getRuntime().exec(cmd[0]);
+ } else {
+ process = Runtime.getRuntime().exec(cmd);
+ }
+
+ try {
+ InputStream out = process.getInputStream();
+
+ try {
+ output = extractOutput(out);
+ } catch (Exception e) {
+ e.printStackTrace();
+ output = "";
+ }
+
+ } finally {
+ try {
+ process.waitFor();
+ } catch (InterruptedException ignore) {
+ }
+
+ return output;
+ }
+
+ }
+
+ private String extractOutput(InputStream stream) throws SAXException,
+ IOException {
+ StringBuffer sb = new StringBuffer();
+ Reader reader = new InputStreamReader(stream, "UTF-8");
+ try {
+ char[] buffer = new char[1024];
+ for (int n = reader.read(buffer); n != -1; n =
reader.read(buffer)) {
+ sb.append(buffer, 0, n);
+ }
+ } finally {
+ reader.close();
+ return sb.toString();
+ }
+ }
+
+ private void processOutput(ContentHandler handler, Metadata metadata,
+ String output) throws SAXException, IOException
{
+ XHTMLContentHandler xhtml = new XHTMLContentHandler(handler, metadata);
+ InputStream stream = new
ByteArrayInputStream(output.getBytes("UTF-8"));
+ Reader reader = new InputStreamReader(stream, "UTF-8");
+ try {
+ xhtml.startDocument();
+ xhtml.startElement("p");
+ char[] buffer = new char[1024];
+ for (int n = reader.read(buffer); n != -1; n =
reader.read(buffer)) {
+ xhtml.characters(buffer, 0, n);
+ }
+ xhtml.endElement("p");
+
+ } finally {
+ reader.close();
+ xhtml.endDocument();
+ }
- }
+ }
}
Modified:
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java?rev=1647743&r1=1647742&r2=1647743&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
(original)
+++
tika/trunk/tika-parsers/src/test/java/org/apache/tika/parser/gdal/TestGDALParser.java
Wed Dec 24 08:16:45 2014
@@ -18,6 +18,7 @@
package org.apache.tika.parser.gdal;
//JDK imports
+
import java.io.InputStream;
//Tika imports
@@ -29,6 +30,7 @@ import org.apache.tika.sax.BodyContentHa
//Junit imports
import org.junit.Test;
+
import static org.junit.Assert.fail;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.assertEquals;
@@ -36,140 +38,138 @@ import static org.junit.Assert.assertNot
import static org.junit.Assume.assumeTrue;
/**
- *
* Test harness for the GDAL parser.
- *
*/
public class TestGDALParser extends TikaTest {
- private boolean canRun() {
- String[] checkCmd = { "gdalinfo" };
- // If GDAL is not on the path, do not run the test.
- return ExternalParser.check(checkCmd);
- }
-
- @Test
- public void testParseBasicInfo() {
- assumeTrue(canRun());
- final String expectedDriver = "netCDF/Network Common Data
Format";
- final String expectedUpperRight = "512.0, 0.0";
- final String expectedUpperLeft = "0.0, 0.0";
- final String expectedLowerLeft = "0.0, 512.0";
- final String expectedLowerRight = "512.0, 512.0";
- final String expectedCoordinateSystem = "`'";
- final String expectedSize = "512, 512";
-
- GDALParser parser = new GDALParser();
- InputStream stream = TestGDALParser.class
-
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
- Metadata met = new Metadata();
- BodyContentHandler handler = new BodyContentHandler();
- try {
- parser.parse(stream, handler, met, new ParseContext());
- assertNotNull(met);
- assertNotNull(met.get("Driver"));
- assertEquals(expectedDriver, met.get("Driver"));
- assertNotNull(met.get("Files"));
- assertNotNull(met.get("Coordinate System"));
- assertEquals(expectedCoordinateSystem,
met.get("Coordinate System"));
- assertNotNull(met.get("Size"));
- assertEquals(expectedSize, met.get("Size"));
- assertNotNull(met.get("Upper Right"));
- assertEquals(expectedUpperRight, met.get("Upper
Right"));
- assertNotNull(met.get("Upper Left"));
- assertEquals(expectedUpperLeft, met.get("Upper Left"));
- assertNotNull(met.get("Upper Right"));
- assertEquals(expectedLowerRight, met.get("Lower
Right"));
- assertNotNull(met.get("Upper Right"));
- assertEquals(expectedLowerLeft, met.get("Lower Left"));
- } catch (Exception e) {
- e.printStackTrace();
- fail(e.getMessage());
- }
- }
-
- @Test
- public void testParseMetadata() {
- assumeTrue(canRun());
- final String expectedNcInst = "NCAR (National Center for
Atmospheric Research, Boulder, CO, USA)";
- final String expectedModelNameEnglish = "NCAR CCSM";
- final String expectedProgramId = "Source file unknown Version
unknown Date unknown";
- final String expectedProjectId = "IPCC Fourth Assessment";
- final String expectedRealization = "1";
- final String expectedTitle = "model output prepared for IPCC
AR4";
- final String expectedSub8Name = "\":ua";
- final String expectedSub8Desc = "[1x17x128x256] eastward_wind
(32-bit floating-point)";
-
- GDALParser parser = new GDALParser();
- InputStream stream = TestGDALParser.class
-
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
- Metadata met = new Metadata();
- BodyContentHandler handler = new BodyContentHandler();
- try {
- parser.parse(stream, handler, met, new ParseContext());
- assertNotNull(met);
- assertNotNull(met.get("NC_GLOBAL#institution"));
- assertEquals(expectedNcInst,
met.get("NC_GLOBAL#institution"));
- assertNotNull(met.get("NC_GLOBAL#model_name_english"));
- assertEquals(expectedModelNameEnglish,
-
met.get("NC_GLOBAL#model_name_english"));
- assertNotNull(met.get("NC_GLOBAL#prg_ID"));
- assertEquals(expectedProgramId,
met.get("NC_GLOBAL#prg_ID"));
- assertNotNull(met.get("NC_GLOBAL#prg_ID"));
- assertEquals(expectedProgramId,
met.get("NC_GLOBAL#prg_ID"));
- assertNotNull(met.get("NC_GLOBAL#project_id"));
- assertEquals(expectedProjectId,
met.get("NC_GLOBAL#project_id"));
- assertNotNull(met.get("NC_GLOBAL#realization"));
- assertEquals(expectedRealization,
met.get("NC_GLOBAL#realization"));
- assertNotNull(met.get("NC_GLOBAL#title"));
- assertEquals(expectedTitle, met.get("NC_GLOBAL#title"));
- assertNotNull(met.get("SUBDATASET_8_NAME"));
-
assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
- assertNotNull(met.get("SUBDATASET_8_DESC"));
- assertEquals(expectedSub8Desc,
met.get("SUBDATASET_8_DESC"));
- } catch (Exception e) {
- e.printStackTrace();
- fail(e.getMessage());
- }
- }
+ private boolean canRun() {
+ String[] checkCmd = {"gdalinfo"};
+ // If GDAL is not on the path, do not run the test.
+ return ExternalParser.check(checkCmd);
+ }
+
+ @Test
+ public void testParseBasicInfo() {
+ assumeTrue(canRun());
+ final String expectedDriver = "netCDF/Network Common Data Format";
+ final String expectedUpperRight = "512.0, 0.0";
+ final String expectedUpperLeft = "0.0, 0.0";
+ final String expectedLowerLeft = "0.0, 512.0";
+ final String expectedLowerRight = "512.0, 512.0";
+ final String expectedCoordinateSystem = "`'";
+ final String expectedSize = "512, 512";
+
+ GDALParser parser = new GDALParser();
+ InputStream stream = TestGDALParser.class
+
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
+ Metadata met = new Metadata();
+ BodyContentHandler handler = new BodyContentHandler();
+ try {
+ parser.parse(stream, handler, met, new ParseContext());
+ assertNotNull(met);
+ assertNotNull(met.get("Driver"));
+ assertEquals(expectedDriver, met.get("Driver"));
+ assertNotNull(met.get("Files"));
+ assertNotNull(met.get("Coordinate System"));
+ assertEquals(expectedCoordinateSystem, met.get("Coordinate
System"));
+ assertNotNull(met.get("Size"));
+ assertEquals(expectedSize, met.get("Size"));
+ assertNotNull(met.get("Upper Right"));
+ assertEquals(expectedUpperRight, met.get("Upper Right"));
+ assertNotNull(met.get("Upper Left"));
+ assertEquals(expectedUpperLeft, met.get("Upper Left"));
+ assertNotNull(met.get("Upper Right"));
+ assertEquals(expectedLowerRight, met.get("Lower Right"));
+ assertNotNull(met.get("Upper Right"));
+ assertEquals(expectedLowerLeft, met.get("Lower Left"));
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
+ }
+
+ @Test
+ public void testParseMetadata() {
+ assumeTrue(canRun());
+ final String expectedNcInst = "NCAR (National Center for Atmospheric
Research, Boulder, CO, USA)";
+ final String expectedModelNameEnglish = "NCAR CCSM";
+ final String expectedProgramId = "Source file unknown Version unknown
Date unknown";
+ final String expectedProjectId = "IPCC Fourth Assessment";
+ final String expectedRealization = "1";
+ final String expectedTitle = "model output prepared for IPCC AR4";
+ final String expectedSub8Name = "\":ua";
+ final String expectedSub8Desc = "[1x17x128x256] eastward_wind (32-bit
floating-point)";
+
+ GDALParser parser = new GDALParser();
+ InputStream stream = TestGDALParser.class
+
.getResourceAsStream("/test-documents/sresa1b_ncar_ccsm3_0_run1_200001.nc");
+ Metadata met = new Metadata();
+ BodyContentHandler handler = new BodyContentHandler();
+ try {
+ parser.parse(stream, handler, met, new ParseContext());
+ assertNotNull(met);
+ assertNotNull(met.get("NC_GLOBAL#institution"));
+ assertEquals(expectedNcInst, met.get("NC_GLOBAL#institution"));
+ assertNotNull(met.get("NC_GLOBAL#model_name_english"));
+ assertEquals(expectedModelNameEnglish,
+ met.get("NC_GLOBAL#model_name_english"));
+ assertNotNull(met.get("NC_GLOBAL#prg_ID"));
+ assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
+ assertNotNull(met.get("NC_GLOBAL#prg_ID"));
+ assertEquals(expectedProgramId, met.get("NC_GLOBAL#prg_ID"));
+ assertNotNull(met.get("NC_GLOBAL#project_id"));
+ assertEquals(expectedProjectId, met.get("NC_GLOBAL#project_id"));
+ assertNotNull(met.get("NC_GLOBAL#realization"));
+ assertEquals(expectedRealization,
met.get("NC_GLOBAL#realization"));
+ assertNotNull(met.get("NC_GLOBAL#title"));
+ assertEquals(expectedTitle, met.get("NC_GLOBAL#title"));
+ assertNotNull(met.get("SUBDATASET_8_NAME"));
+
assertTrue(met.get("SUBDATASET_8_NAME").endsWith(expectedSub8Name));
+ assertNotNull(met.get("SUBDATASET_8_DESC"));
+ assertEquals(expectedSub8Desc, met.get("SUBDATASET_8_DESC"));
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
+ }
- @Test
- public void testParseFITS() {
+ @Test
+ public void testParseFITS() {
String fitsFilename = "/test-documents/WFPC2u5780205r_c0fx.fits";
- assumeTrue(canRun());
+ assumeTrue(canRun());
// If the exit code is 1 (meaning FITS isn't supported by the
installed version of gdalinfo, don't run this test.
- String[] fitsCommand = { "gdalinfo",
TestGDALParser.class.getResource(fitsFilename).getPath() };
+ String[] fitsCommand = {"gdalinfo",
TestGDALParser.class.getResource(fitsFilename).getPath()};
assumeTrue(ExternalParser.check(fitsCommand, 1));
- String expectedAllgMin = "-7.319537E1";
- String expectedAtodcorr = "COMPLETE";
- String expectedAtodfile = "uref$dbu1405iu.r1h";
- String expectedCalVersion = " ";
- String expectedCalibDef = "1466";
-
- GDALParser parser = new GDALParser();
- InputStream stream = TestGDALParser.class
- .getResourceAsStream(fitsFilename);
- Metadata met = new Metadata();
- BodyContentHandler handler = new BodyContentHandler();
- try {
- parser.parse(stream, handler, met, new ParseContext());
- assertNotNull(met);
- assertNotNull(met.get("ALLG-MIN"));
- assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
- assertNotNull(met.get("ATODCORR"));
- assertEquals(expectedAtodcorr, met.get("ATODCORR"));
- assertNotNull(met.get("ATODFILE"));
- assertEquals(expectedAtodfile, met.get("ATODFILE"));
- assertNotNull(met.get("CAL_VER"));
- assertEquals(expectedCalVersion, met.get("CAL_VER"));
- assertNotNull(met.get("CALIBDEF"));
- assertEquals(expectedCalibDef, met.get("CALIBDEF"));
-
- } catch (Exception e) {
- e.printStackTrace();
- fail(e.getMessage());
- }
- }
+ String expectedAllgMin = "-7.319537E1";
+ String expectedAtodcorr = "COMPLETE";
+ String expectedAtodfile = "uref$dbu1405iu.r1h";
+ String expectedCalVersion = " ";
+ String expectedCalibDef = "1466";
+
+ GDALParser parser = new GDALParser();
+ InputStream stream = TestGDALParser.class
+ .getResourceAsStream(fitsFilename);
+ Metadata met = new Metadata();
+ BodyContentHandler handler = new BodyContentHandler();
+ try {
+ parser.parse(stream, handler, met, new ParseContext());
+ assertNotNull(met);
+ assertNotNull(met.get("ALLG-MIN"));
+ assertEquals(expectedAllgMin, met.get("ALLG-MIN"));
+ assertNotNull(met.get("ATODCORR"));
+ assertEquals(expectedAtodcorr, met.get("ATODCORR"));
+ assertNotNull(met.get("ATODFILE"));
+ assertEquals(expectedAtodfile, met.get("ATODFILE"));
+ assertNotNull(met.get("CAL_VER"));
+ assertEquals(expectedCalVersion, met.get("CAL_VER"));
+ assertNotNull(met.get("CALIBDEF"));
+ assertEquals(expectedCalibDef, met.get("CALIBDEF"));
+
+ } catch (Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
+ }
}