This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 758283898 TIKA-1180 -- fixes for pr #2251
758283898 is described below
commit 7582838982a567a0c5e888b414e691128a4d5b4c
Author: tallison <[email protected]>
AuthorDate: Wed Sep 10 13:39:08 2025 -0400
TIKA-1180 -- fixes for pr #2251
---
.../org/apache/tika/detect/MatroskaDetector.java | 17 +++++--
.../apache/tika/detect/MatroskaDetectorTest.java | 55 +++++++++++++++++++--
.../test/resources/test-documents/sample-mkv.noext | Bin 0 -> 5759 bytes
.../resources/test-documents/sample-webm.noext | Bin 0 -> 16234 bytes
.../src/test/resources/test-documents/testMKV.mkv | Bin
.../apache/tika/detect/TestDetectorLoading.java | 8 +--
6 files changed, 67 insertions(+), 13 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
index 27aa17ba1..b2b25f606 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/main/java/org/apache/tika/detect/MatroskaDetector.java
@@ -19,7 +19,8 @@ package org.apache.tika.detect;
import java.io.IOException;
import java.io.InputStream;
-import java.util.Objects;
+
+import org.apache.commons.io.IOUtils;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
@@ -51,12 +52,18 @@ public class MatroskaDetector implements Detector {
*/
@Override
public MediaType detect(InputStream input, Metadata metadata) throws
IOException {
- Objects.requireNonNull(input, "input stream must not be null");
+ if (input == null) {
+ return MediaType.OCTET_STREAM;
+ }
input.mark(64);
byte[] header = new byte[64];
- int bytesRead = input.read(header);
- input.reset();
+ int bytesRead = -1;
+ try {
+ bytesRead = IOUtils.read(input, header, 0, 64);
+ } finally {
+ input.reset();
+ }
if (bytesRead < EBML_HEADER.length) {
return MediaType.OCTET_STREAM;
@@ -85,4 +92,4 @@ public class MatroskaDetector implements Detector {
return MediaType.OCTET_STREAM;
}
-}
\ No newline at end of file
+}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
index 32ed8daa1..a893055a0 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/java/org/apache/tika/detect/MatroskaDetectorTest.java
@@ -1,13 +1,32 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an "AS IS"
+ * BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+ * or implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ */
package org.apache.tika.detect;
+
import static org.junit.jupiter.api.Assertions.assertEquals;
import java.io.IOException;
import java.io.InputStream;
+import org.apache.commons.io.input.UnsynchronizedByteArrayInputStream;
+import org.junit.jupiter.api.Test;
+
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
-import org.junit.jupiter.api.Test;
public class MatroskaDetectorTest {
@@ -19,8 +38,36 @@ public class MatroskaDetectorTest {
@Test
public void testDetectMKV() throws IOException {
- assertEquals(MediaType.video("x-matroska"),
-
detector.detect(getResourceAsStream("/test-documents/sample.nonexist"),
+ assertEquals(MediaType.application("x-matroska"),
+
detector.detect(getResourceAsStream("/test-documents/sample-mkv.noext"),
+ new Metadata()));
+
+ assertEquals(MediaType.application("x-matroska"),
+
detector.detect(getResourceAsStream("/test-documents/testMKV.mkv"),
+ new Metadata()));
+
+
+ }
+
+ @Test
+ public void testDetectWEBM() throws IOException {
+ assertEquals(MediaType.video("webm"),
+
detector.detect(getResourceAsStream("/test-documents/sample-webm.noext"),
new Metadata()));
}
-}
\ No newline at end of file
+
+ @Test
+ public void testNullAndShort() throws Exception {
+ assertEquals(MediaType.OCTET_STREAM,
+ detector.detect(null, new Metadata()));
+
+ byte[] bytes = new byte[10];
+ assertEquals(MediaType.OCTET_STREAM,
+
detector.detect(UnsynchronizedByteArrayInputStream.builder().setByteArray(bytes).get(),
new Metadata()));
+
+ bytes = new byte[0];
+ assertEquals(MediaType.OCTET_STREAM,
+
detector.detect(UnsynchronizedByteArrayInputStream.builder().setByteArray(bytes).get(),
new Metadata()));
+
+ }
+}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-mkv.noext
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-mkv.noext
new file mode 100644
index 000000000..55dc0ca79
Binary files /dev/null and
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-mkv.noext
differ
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-webm.noext
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-webm.noext
new file mode 100644
index 000000000..16324824b
Binary files /dev/null and
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/sample-webm.noext
differ
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testMKV.mkv
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/testMKV.mkv
similarity index 100%
rename from
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/test-documents/testMKV.mkv
rename to
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-audiovideo-module/src/test/resources/test-documents/testMKV.mkv
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
index 82a9e7df9..28b9b0dd4 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/java/org/apache/tika/detect/TestDetectorLoading.java
@@ -32,13 +32,13 @@ public class TestDetectorLoading {
//integration test
Detector detector = TikaConfig.getDefaultConfig().getDetector();
List<Detector> detectors = ((CompositeDetector)
detector).getDetectors();
- assertEquals(7, detectors.size());
+ assertEquals(8, detectors.size());
assertEquals("org.gagravarr.tika.OggDetector",
detectors.get(0).getClass().getName());
assertEquals("org.apache.tika.detect.gzip.GZipSpecializationDetector",
- detectors.get(2).getClass().getName());
+ detectors.get(3).getClass().getName());
assertEquals("org.apache.tika.detect.microsoft.POIFSContainerDetector",
- detectors.get(3).getClass().getName());
- assertEquals("org.apache.tika.mime.MimeTypes",
detectors.get(6).getClass().getName());
+ detectors.get(4).getClass().getName());
+ assertEquals("org.apache.tika.mime.MimeTypes",
detectors.get(7).getClass().getName());
}
}