This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git


The following commit(s) were added to refs/heads/main by this push:
     new 758283898 TIKA-1180 -- fixes for pr #2251
758283898 is described below

commit 7582838982a567a0c5e888b414e691128a4d5b4c
Author: tallison <[email protected]>
AuthorDate: Wed Sep 10 13:39:08 2025 -0400

    TIKA-1180 -- fixes for pr #2251
---
 .../org/apache/tika/detect/MatroskaDetector.java   |  17 +++++--
 .../apache/tika/detect/MatroskaDetectorTest.java   |  55 +++++++++++++++++++--
 .../test/resources/test-documents/sample-mkv.noext | Bin 0 -> 5759 bytes
 .../resources/test-documents/sample-webm.noext     | Bin 0 -> 16234 bytes
 .../src/test/resources/test-documents/testMKV.mkv  | Bin
 .../apache/tika/detect/TestDetectorLoading.java    |   8 +--
 6 files changed, 67 insertions(+), 13 deletions(-)

diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
index 27aa17ba1..b2b25f606 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
@@ -19,7 +19,8 @@ package org.apache.tika.detect;
 
 import java.io.IOException;
 import java.io.InputStream;
-import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
 
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
@@ -51,12 +52,18 @@ public class MatroskaDetector implements Detector {
      */
     @Override
     public MediaType detect(InputStream input, Metadata metadata) throws 
IOException {
-        Objects.requireNonNull(input, "input stream must not be null");
+        if (input == null) {
+            return MediaType.OCTET_STREAM;
+        }
         input.mark(64);
 
         byte[] header = new byte[64];
-        int bytesRead = input.read(header);
-        input.reset();
+        int bytesRead = -1;
+        try {
+            bytesRead = IOUtils.read(input, header, 0, 64);
+        } finally {
+            input.reset();
+        }
 
         if (bytesRead < EBML_HEADER.length) {
             return MediaType.OCTET_STREAM;
@@ -85,4 +92,4 @@ public class MatroskaDetector implements Detector {
 
         return MediaType.OCTET_STREAM;
     }
-}
\ No newline at end of file
+}
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
index 32ed8daa1..a893055a0 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
@@ -1,13 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied.  See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
 package org.apache.tika.detect;
 
+
 import static org.junit.jupiter.api.Assertions.assertEquals;
 
 import java.io.IOException;
 import java.io.InputStream;
 
+import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
+import org.junit.jupiter.api.Test;
+
 import org.apache.tika.metadata.Metadata;
 import org.apache.tika.mime.MediaType;
-import org.junit.jupiter.api.Test;
 
 public class MatroskaDetectorTest {
 
@@ -19,8 +38,36 @@ public class MatroskaDetectorTest {
 
     @Test
     public void testDetectMKV() throws IOException {
-        assertEquals(MediaType.video("x-matroska"),
-                
detector.detect(getResourceAsStream("/test-documents/sample.nonexist"),
+        assertEquals(MediaType.application("x-matroska"),
+                
detector.detect(getResourceAsStream("/test-documents/sample-mkv.noext"),
+                        new Metadata()));
+
+        assertEquals(MediaType.application("x-matroska"),
+                
detector.detect(getResourceAsStream("/test-documents/testMKV.mkv"),
+                        new Metadata()));
+
+
+    }
+
+    @Test
+    public void testDetectWEBM() throws IOException {
+        assertEquals(MediaType.video("webm"),
+                
detector.detect(getResourceAsStream("/test-documents/sample-webm.noext"),
                         new Metadata()));
     }
-}
\ No newline at end of file
+
+    @Test
+    public void testNullAndShort() throws Exception {
+        assertEquals(MediaType.OCTET_STREAM,
+                detector.detect(null, new Metadata()));
+
+        byte[] bytes = new byte[10];
+        assertEquals(MediaType.OCTET_STREAM,
+                
detector.detect(UnsynchronizedByteArrayInputStream.builder().setByteArray(bytes).get(),
 new Metadata()));
+
+        bytes = new byte[0];
+        assertEquals(MediaType.OCTET_STREAM,
+                
detector.detect(UnsynchronizedByteArrayInputStream.builder().setByteArray(bytes).get(),
 new Metadata()));
+
+    }
+}
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-mkv.noext
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-mkv.noext
new file mode 100644
index 000000000..55dc0ca79
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-mkv.noext
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-webm.noext
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-webm.noext
new file mode 100644
index 000000000..16324824b
Binary files /dev/null and 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-webm.noext
 differ
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testMKV.mkv
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/testMKV.mkv
similarity index 100%
rename from 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testMKV.mkv
rename to 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/testMKV.mkv
diff --git 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
index 82a9e7df9..28b9b0dd4 100644
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
@@ -32,13 +32,13 @@ public class TestDetectorLoading {
         //integration test
         Detector detector = TikaConfig.getDefaultConfig().getDetector();
         List<Detector> detectors = ((CompositeDetector) 
detector).getDetectors();
-        assertEquals(7, detectors.size());
+        assertEquals(8, detectors.size());
         assertEquals("org.gagravarr.tika.OggDetector", 
detectors.get(0).getClass().getName());
         assertEquals("org.apache.tika.detect.gzip.GZipSpecializationDetector",
-                detectors.get(2).getClass().getName());
+                detectors.get(3).getClass().getName());
 
         assertEquals("org.apache.tika.detect.microsoft.POIFSContainerDetector",
-                detectors.get(3).getClass().getName());
-        assertEquals("org.apache.tika.mime.MimeTypes", 
detectors.get(6).getClass().getName());
+                detectors.get(4).getClass().getName());
+        assertEquals("org.apache.tika.mime.MimeTypes", 
detectors.get(7).getClass().getName());
     }
 }

Reply via email to