This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch branch_3x
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/branch_3x by this push:
new 1ae03db8c7 [TIKA-4704] use TemporaryResources to avoid leak (#2725)
1ae03db8c7 is described below
commit 1ae03db8c7542f89e4575f318476a0c23a385051
Author: Tilman Hausherr <[email protected]>
AuthorDate: Tue Mar 31 09:50:29 2026 +0200
[TIKA-4704] use TemporaryResources to avoid leak (#2725)
* [TIKA-4704] use TemporaryResources to avoid leak
Added temporary resources management for TikaInputStream.
* Remove TODO comment about temp file leak
Removed TODO comment regarding temporary file leak in OOXMLParserTest.
* Clean up TODO comments in OOXMLParserTest
Removed TODO comments regarding temporary file leaks in tests.
* revert accidental change by "Binnen-I be gone" plugin
* Fix license URL in OOXMLParserTest.java
* Add import for TemporaryResources in OOXMLExtractorFactory
---
.../apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java | 5 ++++-
.../java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java | 5 +----
2 files changed, 5 insertions(+), 5 deletions(-)
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
index 005e036099..09ac11dc72 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/main/java/org/apache/tika/parser/microsoft/ooxml/OOXMLExtractorFactory.java
@@ -50,6 +50,7 @@ import org.xml.sax.SAXException;
import org.apache.tika.detect.microsoft.ooxml.OPCPackageDetector;
import org.apache.tika.exception.RuntimeSAXException;
import org.apache.tika.exception.TikaException;
+import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaCoreProperties;
@@ -97,11 +98,12 @@ public class OOXMLExtractorFactory {
//if the pkg is in the opencontainer of a TikaInputStream, it will get
closed.
//However, if a regular inputstream has been sent in, we need to
revert the pkg.
boolean mustRevertPackage = false;
+ TemporaryResources tmp = new TemporaryResources();
try {
OOXMLExtractor extractor = null;
// Locate or Open the OPCPackage for the file
- TikaInputStream tis = TikaInputStream.get(stream);
+ TikaInputStream tis = TikaInputStream.get(stream, tmp, metadata);
if (tis.getOpenContainer() instanceof OPCPackageWrapper) {
pkg = ((OPCPackageWrapper)
tis.getOpenContainer()).getOPCPackage();
} else {
@@ -226,6 +228,7 @@ public class OOXMLExtractorFactory {
tmpRepairedCopy.getAbsolutePath());
}
}
+ tmp.dispose();
}
}
diff --git
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
index 60696cc877..b5504dc6ed 100644
---
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
+++
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-microsoft-module/src/test/java/org/apache/tika/parser/microsoft/ooxml/OOXMLParserTest.java
@@ -817,7 +817,6 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
context.set(Locale.class, Locale.US);
new OOXMLParser().parse(input, handler, metadata, context);
}
- //TODO temp file leak
assertEquals("application/vnd.openxmlformats-officedocument.wordprocessingml.document",
metadata.get(Metadata.CONTENT_TYPE));
@@ -852,7 +851,6 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
context.set(Locale.class, Locale.US);
new OOXMLParser().parse(input, handler, metadata, context);
}
- //TODO temp file leak
assertEquals("application/vnd.openxmlformats-officedocument.presentationml.presentation",
metadata.get(Metadata.CONTENT_TYPE));
@@ -885,7 +883,7 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
"/test-documents/testWORD_embedded_pdf.docx")) {
new OOXMLParser().parse(input, handler, metadata, new
ParseContext());
}
- //TODO tmp file leak
+
String xml = sw.toString();
int i = xml.indexOf("Here is the pdf file:");
int j = xml.indexOf("<div class=\"embedded\" id=\"rId5\"/>");
@@ -939,7 +937,6 @@ public class OOXMLParserTest extends MultiThreadedTikaTest {
try (InputStream stream =
getResourceAsStream("/test-documents/testWORD_no_format.docx")) {
new OOXMLParser().parse(stream, handler, metadata, new
ParseContext());
}
- //TODO temp file leak
String content = handler.toString();
assertContains("This is a piece of text that causes an exception",
content);