This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/master by this push:
new 8afa4df TIKA-2861 -- extract lat/long from AppleGPSCoordinatesBox
8afa4df is described below
commit 8afa4df9469b12f1be3dfaa1b2a136e8fa2c7988
Author: TALLISON <[email protected]>
AuthorDate: Thu May 2 11:58:08 2019 -0400
TIKA-2861 -- extract lat/long from AppleGPSCoordinatesBox
---
.../apache/tika/parser/mp4/ISO6709Extractor.java | 87 ++++++++++++++++++++++
.../java/org/apache/tika/parser/mp4/MP4Parser.java | 15 +++-
.../tika/parser/mp4/ISO6709ExtractorTest.java | 50 +++++++++++++
3 files changed, 150 insertions(+), 2 deletions(-)
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/ISO6709Extractor.java
b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/ISO6709Extractor.java
new file mode 100644
index 0000000..d6b7504
--- /dev/null
+++
b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/ISO6709Extractor.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp4;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.apache.tika.metadata.Metadata;
+
+class ISO6709Extractor {
+ //based on: https://en.wikipedia.org/wiki/ISO_6709
+ //strip lat long -- ignore crs for now
+ private static final Pattern ISO6709_PATTERN =
+
Pattern.compile("\\A([-+])(\\d{2,6})(\\.\\d+)?([-+])(\\d{3,7})(\\.\\d+)?");
+
+ //must be thread safe
+ public void extract(String s, Metadata m) {
+ if (s == null) {
+ return;
+ }
+ Matcher matcher = ISO6709_PATTERN.matcher(s);
+ if (matcher.find()) {
+ String lat = getLat(matcher.group(1), matcher.group(2),
matcher.group(3));
+ String lng = getLng(matcher.group(4), matcher.group(5),
matcher.group(6));
+ m.set(Metadata.LATITUDE, lat);
+ m.set(Metadata.LONGITUDE, lng);
+ } else {
+ //ignore problems for now?
+ }
+
+ }
+
+ private String getLng(String sign, String integer, String flot) {
+ String flotNormed = (flot == null) ? "" : flot;
+ if (integer.length() == 3) {
+ return sign+integer+flotNormed;
+ } else if (integer.length() == 5) {
+ return calcDecimalDegrees(sign, integer.substring(0,3),
integer.substring(3,5)+flotNormed);
+ } else if (integer.length() == 7) {
+ return calcDecimalDegrees(sign, integer.substring(0,3),
integer.substring(3,5), integer.substring(5,7)+flotNormed);
+ } else {
+ //ignore problems for now?
+ }
+ return "";
+ }
+
+ private String getLat(String sign, String integer, String flot) {
+ String flotNormed = (flot == null) ? "" : flot;
+ if (integer.length() == 2) {
+ return sign+integer+flotNormed;
+ } else if (integer.length() == 4) {
+ return calcDecimalDegrees(sign, integer.substring(0,2),
integer.substring(2,4)+flotNormed);
+ } else if (integer.length() == 6) {
+ return calcDecimalDegrees(sign, integer.substring(0,2),
integer.substring(2,4), integer.substring(4,6)+flotNormed);
+ } else {
+ //ignore problems for now?
+ }
+ return "";
+ }
+
+ private String calcDecimalDegrees(String sign, String degrees, String
minutes) {
+ double d = Integer.parseInt(degrees);
+ d += (Double.parseDouble(minutes)/60);
+ return sign+String.format("%.8f", d);
+ }
+
+ private String calcDecimalDegrees(String sign, String degrees, String
minutes, String seconds) {
+ double d = Integer.parseInt(degrees);
+ d += (Double.parseDouble(minutes)/60);
+ d += (Double.parseDouble(seconds)/3600);
+ return sign+String.format("%.8f", d);
+ }
+}
diff --git
a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
index afa63b8..a9addee 100644
--- a/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
+++ b/tika-parsers/src/main/java/org/apache/tika/parser/mp4/MP4Parser.java
@@ -39,13 +39,13 @@ import com.googlecode.mp4parser.boxes.apple.AppleCommentBox;
import com.googlecode.mp4parser.boxes.apple.AppleCompilationBox;
import com.googlecode.mp4parser.boxes.apple.AppleDiskNumberBox;
import com.googlecode.mp4parser.boxes.apple.AppleEncoderBox;
+import com.googlecode.mp4parser.boxes.apple.AppleGPSCoordinatesBox;
import com.googlecode.mp4parser.boxes.apple.AppleGenreBox;
import com.googlecode.mp4parser.boxes.apple.AppleNameBox;
import com.googlecode.mp4parser.boxes.apple.AppleRecordingYear2Box;
import com.googlecode.mp4parser.boxes.apple.AppleTrackAuthorBox;
import com.googlecode.mp4parser.boxes.apple.AppleTrackNumberBox;
import com.googlecode.mp4parser.boxes.apple.Utf8AppleDataBox;
-import org.apache.poi.ss.formula.functions.T;
import org.apache.tika.exception.TikaException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
@@ -89,7 +89,6 @@ public class MP4Parser extends AbstractParser {
static {
DURATION_FORMAT.applyPattern("0.0#");
}
-
// Ensure this stays in Sync with the entries in tika-mimetypes.xml
private static final Map<MediaType,List<String>> typesMap = new
HashMap<MediaType, List<String>>();
static {
@@ -112,6 +111,8 @@ public class MP4Parser extends AbstractParser {
private static final Set<MediaType> SUPPORTED_TYPES =
Collections.unmodifiableSet(typesMap.keySet());
+ private ISO6709Extractor iso6709Extractor = new ISO6709Extractor();
+
public Set<MediaType> getSupportedTypes(ParseContext context) {
return SUPPORTED_TYPES;
}
@@ -216,6 +217,7 @@ public class MP4Parser extends AbstractParser {
// Get metadata from the User Data Box
UserDataBox userData = getOrNull(moov, UserDataBox.class);
if (userData != null) {
+ extractGPS(userData, metadata);
MetaBox meta = getOrNull(userData, MetaBox.class);
// Check for iTunes Metadata
@@ -305,6 +307,15 @@ public class MP4Parser extends AbstractParser {
}
+ private void extractGPS(UserDataBox userData, Metadata metadata) {
+ AppleGPSCoordinatesBox coordBox = getOrNull(userData,
AppleGPSCoordinatesBox.class);
+ if (coordBox == null) {
+ return;
+ }
+ String iso6709 = coordBox.getValue();
+ iso6709Extractor.extract(iso6709, metadata);
+ }
+
private static void addMetadata(Property prop, Metadata m,
Utf8AppleDataBox metadata) {
if (metadata != null) {
m.set(prop, metadata.getValue());
diff --git
a/tika-parsers/src/test/java/org/apache/tika/parser/mp4/ISO6709ExtractorTest.java
b/tika-parsers/src/test/java/org/apache/tika/parser/mp4/ISO6709ExtractorTest.java
new file mode 100644
index 0000000..4a36011
--- /dev/null
+++
b/tika-parsers/src/test/java/org/apache/tika/parser/mp4/ISO6709ExtractorTest.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser.mp4;
+
+import static org.junit.Assert.assertEquals;
+
+import org.apache.tika.TikaTest;
+import org.apache.tika.metadata.Metadata;
+import org.junit.Test;
+
+public class ISO6709ExtractorTest {
+
+ @Test
+ public void test() throws Exception {
+ Metadata m = new Metadata();
+ ISO6709Extractor ex = new ISO6709Extractor();
+ ex.extract("+40.20361-075.00417/", m);
+ assertCorrect(m);
+
+ m = new Metadata();
+ ex.extract("+4012.22-07500.25/", m);
+ assertCorrect(m);
+
+ m = new Metadata();
+ ex.extract("+401213.1-0750015.1/", m);
+ assertCorrect(m);
+
+ }
+
+ private void assertCorrect(Metadata m) {
+ double lat = Double.parseDouble(m.get(Metadata.LATITUDE));
+ double lng = Double.parseDouble(m.get(Metadata.LONGITUDE));
+ assertEquals(40.20361, lat, 0.0001);
+ assertEquals(-75.00417, lng, 0.0001);
+ }
+}