This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 634f9191f TIKA-3812 -- fix unit test to confirm plain png and jpeg 
work with config file
634f9191f is described below

commit 634f9191f1a1f3cd21a5ff4311af249663567716
Author: tallison <[email protected]>
AuthorDate: Tue Oct 4 12:27:13 2022 -0400

    TIKA-3812 -- fix unit test to confirm plain png and jpeg work with config 
file
---
 .../pom.xml                                        |  6 +++
 .../java/org/apache/tika/parser/ocr/TestOCR.java   | 48 +++++++++++++++++++++-
 .../config/tika-config-restricted-gdal.xml         | 32 +++++++++++++++
 3 files changed, 84 insertions(+), 2 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/pom.xml
 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/pom.xml
index ede345235..dfc679ec7 100644
--- 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/pom.xml
+++ 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/pom.xml
@@ -35,6 +35,12 @@
       <version>${project.version}</version>
       <scope>test</scope>
     </dependency>
+    <dependency>
+      <groupId>${project.groupId}</groupId>
+      <artifactId>tika-parser-scientific-module</artifactId>
+      <version>${project.version}</version>
+      <scope>test</scope>
+    </dependency>
     <dependency>
       <groupId>${project.groupId}</groupId>
       <artifactId>tika-parser-sqlite3-package</artifactId>
diff --git 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
index e466a34ca..f11ede9bf 100644
--- 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
+++ 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/java/org/apache/tika/parser/ocr/TestOCR.java
@@ -16,16 +16,29 @@
  */
 package org.apache.tika.parser.ocr;
 
+import static org.junit.jupiter.api.Assertions.assertEquals;
 import static org.junit.jupiter.api.Assumptions.assumeTrue;
 
+import java.io.IOException;
+import java.io.InputStream;
 import java.util.List;
+import java.util.Map;
 
 import org.junit.jupiter.api.BeforeAll;
 import org.junit.jupiter.api.Test;
+import org.xml.sax.SAXException;
 
 import org.apache.tika.TikaTest;
+import org.apache.tika.config.TikaConfig;
+import org.apache.tika.exception.TikaException;
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.metadata.TikaCoreProperties;
+import org.apache.tika.mime.MediaType;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.CompositeParser;
+import org.apache.tika.parser.Parser;
+import org.apache.tika.parser.ParserDecorator;
+import org.apache.tika.parser.gdal.GDALParser;
 
 public class TestOCR extends TikaTest {
 
@@ -37,13 +50,44 @@ public class TestOCR extends TikaTest {
 
     @Test
     public void testJPEG() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testOCR.jpg");
+        List<Metadata> metadataList = getRecursiveMetadata("testOCR.jpg", 
loadParser());
         assertContains("OCR Testing", 
metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
     }
 
     @Test
     public void testPNG() throws Exception {
-        List<Metadata> metadataList = getRecursiveMetadata("testOCR.png");
+        List<Metadata> metadataList = getRecursiveMetadata("testOCR.png", 
loadParser());
         assertContains("file contains", 
metadataList.get(0).get(TikaCoreProperties.TIKA_CONTENT));
     }
+
+    @Test
+    public void testOthers() throws Exception {
+        Parser p = loadParser();
+        if (p instanceof CompositeParser) {
+            Map<MediaType, Parser> parsers = ((CompositeParser)p).getParsers();
+            Class clz = getParser(MediaType.application("x-netcdf"), parsers);
+            assertEquals(GDALParser.class, clz);
+        }
+    }
+
+    private Class getParser(MediaType mediaType, Map<MediaType, Parser> 
parsers) {
+        //this is fragile, but works well enough for a unit test
+        Parser p = parsers.get(mediaType);
+        if (p instanceof CompositeParser) {
+            return getParser(mediaType, ((CompositeParser)p).getParsers());
+        } else if (p instanceof ParserDecorator) {
+            Parser decorated = ((ParserDecorator)p).getWrappedParser();
+            return decorated.getClass();
+        }
+        return p.getClass();
+    }
+
+    private Parser loadParser() throws IOException, TikaException, 
SAXException {
+        try (InputStream is = TestOCR.class.getResourceAsStream(
+                "/config/tika-config-restricted-gdal.xml")) {
+            TikaConfig tikaConfig = new TikaConfig(is);
+            return new AutoDetectParser(tikaConfig);
+        }
+    }
+
 }
diff --git 
a/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/resources/config/tika-config-restricted-gdal.xml
 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/resources/config/tika-config-restricted-gdal.xml
new file mode 100644
index 000000000..5e6fe2461
--- /dev/null
+++ 
b/tika-parsers/tika-parsers-extended/tika-parsers-extended-integration-tests/src/test/resources/config/tika-config-restricted-gdal.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<properties>
+  <parsers>
+    <parser class="org.apache.tika.parser.DefaultParser">
+      <!-- don't load this here, load it later -->
+      <parser-exclude class="org.apache.tika.parser.gdal.GDALParser"/>
+    </parser>
+    <!-- this prevents the GDALParser from parsing these file formats -->
+    <parser class="org.apache.tika.parser.gdal.GDALParser">
+      <mime-exclude>image/jpeg</mime-exclude>
+      <mime-exclude>image/png</mime-exclude>
+      <mime-exclude>image/jp2</mime-exclude>
+      <mime-exclude>image/gif</mime-exclude>
+    </parser>
+  </parsers>
+</properties>

Reply via email to