This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/master by this push:
     new 8afa4df  TIKA-2861 -- extract lat/long from AppleGPSCoordinatesBox
8afa4df is described below

commit 8afa4df9469b12f1be3dfaa1b2a136e8fa2c7988
Author: TALLISON <[email protected]>
AuthorDate: Thu May 2 11:58:08 2019 -0400

    TIKA-2861 -- extract lat/long from AppleGPSCoordinatesBox
---
 .../apache/tika/parser/mp4/ISO6709Extractor.java   | 87 ++++++++++++++++++++++
 .../java/org/apache/tika/parser/mp4/MP4Parser.java | 15 +++-
 .../tika/parser/mp4/ISO6709ExtractorTest.java      | 50 +++++++++++++
 3 files changed, 150 insertions(+), 2 deletions(-)

diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/ISO6709Extractor.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/ISO6709Extractor.java
new file mode 100644
index 0000000..d6b7504
--- /dev/null
+++ 
b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/ISO6709Extractor.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp4;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.tika.metadata.Metadata;
+
+class ISO6709Extractor {
+    //based on: https://en.wikipedia.org/wiki/ISO_6709
+    //strip lat long -- ignore crs for now
+    private static final Pattern ISO6709_PATTERN =
+            
Pattern.compile("\\A([-+])(\\d{2,6})(\\.\\d+)?([-+])(\\d{3,7})(\\.\\d+)?");
+
+    //must be thread safe
+    public void extract(String s, Metadata m) {
+        if (s == null) {
+            return;
+        }
+        Matcher matcher = ISO6709_PATTERN.matcher(s);
+        if (matcher.find()) {
+            String lat = getLat(matcher.group(1), matcher.group(2), 
matcher.group(3));
+            String lng = getLng(matcher.group(4), matcher.group(5), 
matcher.group(6));
+            m.set(Metadata.LATITUDE, lat);
+            m.set(Metadata.LONGITUDE, lng);
+        } else {
+            //ignore problems for now?
+        }
+
+    }
+
+    private String getLng(String sign, String integer, String flot) {
+        String flotNormed = (flot == null) ? "" : flot;
+        if (integer.length() == 3) {
+            return sign+integer+flotNormed;
+        } else if (integer.length() == 5) {
+            return calcDecimalDegrees(sign, integer.substring(0,3), 
integer.substring(3,5)+flotNormed);
+        } else if (integer.length() == 7) {
+            return calcDecimalDegrees(sign, integer.substring(0,3), 
integer.substring(3,5), integer.substring(5,7)+flotNormed);
+        } else {
+            //ignore problems for now?
+        }
+        return "";
+    }
+
+    private String getLat(String sign, String integer, String flot) {
+        String flotNormed = (flot == null) ? "" : flot;
+        if (integer.length() == 2) {
+            return sign+integer+flotNormed;
+        } else if (integer.length() == 4) {
+            return calcDecimalDegrees(sign, integer.substring(0,2), 
integer.substring(2,4)+flotNormed);
+        } else if (integer.length() == 6) {
+            return calcDecimalDegrees(sign, integer.substring(0,2), 
integer.substring(2,4), integer.substring(4,6)+flotNormed);
+        } else {
+            //ignore problems for now?
+        }
+        return "";
+    }
+
+    private String calcDecimalDegrees(String sign, String degrees, String 
minutes) {
+        double d = Integer.parseInt(degrees);
+        d += (Double.parseDouble(minutes)/60);
+        return sign+String.format("%.8f", d);
+    }
+
+    private String calcDecimalDegrees(String sign, String degrees, String 
minutes, String seconds) {
+        double d = Integer.parseInt(degrees);
+        d += (Double.parseDouble(minutes)/60);
+        d += (Double.parseDouble(seconds)/3600);
+        return sign+String.format("%.8f", d);
+    }
+}
diff --git 
a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java 
b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
index afa63b8..a9addee 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
@@ -39,13 +39,13 @@ import com.googlecode.mp4parser.boxes.apple.AppleCommentBox;
 import com.googlecode.mp4parser.boxes.apple.AppleCompilationBox;
 import com.googlecode.mp4parser.boxes.apple.AppleDiskNumberBox;
 import com.googlecode.mp4parser.boxes.apple.AppleEncoderBox;
+import com.googlecode.mp4parser.boxes.apple.AppleGPSCoordinatesBox;
 import com.googlecode.mp4parser.boxes.apple.AppleGenreBox;
 import com.googlecode.mp4parser.boxes.apple.AppleNameBox;
 import com.googlecode.mp4parser.boxes.apple.AppleRecordingYear2Box;
 import com.googlecode.mp4parser.boxes.apple.AppleTrackAuthorBox;
 import com.googlecode.mp4parser.boxes.apple.AppleTrackNumberBox;
 import com.googlecode.mp4parser.boxes.apple.Utf8AppleDataBox;
-import org.apache.poi.ss.formula.functions.T;
 import org.apache.tika.exception.TikaException;
 import org.apache.tika.io.TemporaryResources;
 import org.apache.tika.io.TikaInputStream;
@@ -89,7 +89,6 @@ public class MP4Parser extends AbstractParser {
     static {
         DURATION_FORMAT.applyPattern("0.0#");
     }
-    
     // Ensure this stays in Sync with the entries in tika-mimetypes.xml
     private static final Map<MediaType,List<String>> typesMap = new 
HashMap<MediaType, List<String>>();
     static {
@@ -112,6 +111,8 @@ public class MP4Parser extends AbstractParser {
     private static final Set<MediaType> SUPPORTED_TYPES =
        Collections.unmodifiableSet(typesMap.keySet());
 
+    private ISO6709Extractor iso6709Extractor = new ISO6709Extractor();
+
     public Set<MediaType> getSupportedTypes(ParseContext context) {
         return SUPPORTED_TYPES;
     }
@@ -216,6 +217,7 @@ public class MP4Parser extends AbstractParser {
                 // Get metadata from the User Data Box
                 UserDataBox userData = getOrNull(moov, UserDataBox.class);
                 if (userData != null) {
+                    extractGPS(userData, metadata);
                     MetaBox meta = getOrNull(userData, MetaBox.class);
 
                     // Check for iTunes Metadata
@@ -305,6 +307,15 @@ public class MP4Parser extends AbstractParser {
 
     }
 
+    private void extractGPS(UserDataBox userData, Metadata metadata) {
+        AppleGPSCoordinatesBox coordBox = getOrNull(userData, 
AppleGPSCoordinatesBox.class);
+        if (coordBox == null) {
+            return;
+        }
+        String iso6709 = coordBox.getValue();
+        iso6709Extractor.extract(iso6709, metadata);
+    }
+
     private static void addMetadata(Property prop, Metadata m, 
Utf8AppleDataBox metadata) {
        if (metadata != null) {
           m.set(prop, metadata.getValue());
diff --git 
a/tika-parsers/src/test/java/org/apache/tika/parser/mp4/ISO6709ExtractorTest.java
 
b/tika-parsers/src/test/java/org/apache/tika/parser/mp4/ISO6709ExtractorTest.java
new file mode 100644
index 0000000..4a36011
--- /dev/null
+++ 
b/tika-parsers/src/test/java/org/apache/tika/parser/mp4/ISO6709ExtractorTest.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp4;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class ISO6709ExtractorTest {
+
+    @Test
+    public void test() throws Exception {
+        Metadata m = new Metadata();
+        ISO6709Extractor ex = new ISO6709Extractor();
+        ex.extract("+40.20361-075.00417/", m);
+        assertCorrect(m);
+
+        m = new Metadata();
+        ex.extract("+4012.22-07500.25/", m);
+        assertCorrect(m);
+
+        m = new Metadata();
+        ex.extract("+401213.1-0750015.1/", m);
+        assertCorrect(m);
+
+    }
+
+    private void assertCorrect(Metadata m) {
+        double lat = Double.parseDouble(m.get(Metadata.LATITUDE));
+        double lng = Double.parseDouble(m.get(Metadata.LONGITUDE));
+        assertEquals(40.20361, lat, 0.0001);
+        assertEquals(-75.00417, lng, 0.0001);
+    }
+}

Reply via email to