This is an automated email from the ASF dual-hosted git repository.
tilman pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new c960d38ce1 TIKA-4704: close client so that temp directory gets deleted
(#2743)
c960d38ce1 is described below
commit c960d38ce1d05f8d31b3b3bc66231ed1465b7978
Author: Tilman Hausherr <[email protected]>
AuthorDate: Mon Apr 6 17:17:49 2026 +0200
TIKA-4704: close client so that temp directory gets deleted (#2743)
* TIKA-4704: close client so that temp directory gets deleted
* Refactor PipesClient initialization in tests
Refactor FrictionlessUnpackTest to use try-with-resources for PipesClient
initialization, reducing code duplication and improving resource management.
* Refactor try-with-resources syntax in tests
* Refactor try-with-resources syntax in tests
* Refactor testWriteLimiter methods for clarity
* Clean up comments in MetadataWriteLimiterTest
Removed redundant comments in the test case for write limiter override.
* Refactor JSON config string in testWriteLimiterOverride
Updated the JSON configuration string for ParseContext to improve
readability.
* Clean up comments in MetadataWriteLimiterTest
Removed commented code regarding ParseContext override for
X-TIKA:parse_time_millis.
* Update comment to reflect number of embedded documents
* Update
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java
Co-authored-by: Copilot <[email protected]>
* Close pipesClient in PassbackFilterTest
Close pipesClient after emitting data in test.
* Refactor PipesClient usage in tests to try-with-resources
* Update
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/EmbeddedLimitsTest.java
Co-authored-by: Copilot <[email protected]>
* Refactor UnpackModeTest for improved readability
* Refactor PipesClientTest to use try-with-resources
* Refactor UnpackModeTest for clarity and structure
* Update
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/UnpackModeTest.java
Co-authored-by: Copilot <[email protected]>
---------
Co-authored-by: Copilot <[email protected]>
---
.../apache/tika/pipes/core/EmbeddedLimitsTest.java | 279 +++++----
.../tika/pipes/core/FrictionlessUnpackTest.java | 440 +++++++-------
.../tika/pipes/core/MetadataWriteLimiterTest.java | 67 +--
.../apache/tika/pipes/core/PassbackFilterTest.java | 1 +
.../apache/tika/pipes/core/PipesClientTest.java | 206 +++----
.../org/apache/tika/pipes/core/UnpackModeTest.java | 656 ++++++++++-----------
6 files changed, 825 insertions(+), 824 deletions(-)
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/EmbeddedLimitsTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/EmbeddedLimitsTest.java
index 908e04ecf6..58a6f6e28d 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/EmbeddedLimitsTest.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/EmbeddedLimitsTest.java
@@ -41,7 +41,7 @@ import
org.apache.tika.sax.AbstractRecursiveParserWrapperHandler;
public class EmbeddedLimitsTest {
private static final String FETCHER_NAME = "fsf";
- // mock-embedded.xml has 2 embedded documents
+ // mock-embedded.xml has 4 embedded documents
private static final String TEST_DOC_WITH_EMBEDDED = "mock-embedded.xml";
private PipesClient init(Path tmp, String testFileName) throws Exception {
@@ -64,20 +64,20 @@ public class EmbeddedLimitsTest {
limits.setMaxCount(1);
parseContext.set(EmbeddedLimits.class, limits);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertTrue(pipesResult.isSuccess(), "Parse should succeed");
- // With maxCount=1, we should get the container (1) + 1 embedded = 2
metadata objects
- // Note: The actual count depends on how EmbeddedLimits is applied
- int metadataCount = pipesResult.emitData().getMetadataList().size();
- assertTrue(metadataCount <= 2,
- "Should have at most 2 metadata objects (container + 1
embedded), got: " + metadataCount);
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ assertTrue(pipesResult.isSuccess(), "Parse should succeed");
+ // With maxCount=1, we should get the container (1) + 1 embedded =
2 metadata objects
+ // Note: The actual count depends on how EmbeddedLimits is applied
+ int metadataCount =
pipesResult.emitData().getMetadataList().size();
+ assertTrue(metadataCount <= 2,
+ "Should have at most 2 metadata objects (container + 1
embedded), got: " + metadataCount);
+ }
}
@Test
@@ -90,19 +90,19 @@ public class EmbeddedLimitsTest {
limits.setMaxDepth(0);
parseContext.set(EmbeddedLimits.class, limits);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertTrue(pipesResult.isSuccess(), "Parse should succeed");
- // With maxDepth=0, we should only get the container (1 metadata
object)
- int metadataCount = pipesResult.emitData().getMetadataList().size();
- assertEquals(1, metadataCount,
- "Should have only 1 metadata object (container only) with
maxDepth=0");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ assertTrue(pipesResult.isSuccess(), "Parse should succeed");
+ // With maxDepth=0, we should only get the container (1 metadata
object)
+ int metadataCount =
pipesResult.emitData().getMetadataList().size();
+ assertEquals(1, metadataCount,
+ "Should have only 1 metadata object (container only) with
maxDepth=0");
+ }
}
@Test
@@ -110,20 +110,19 @@ public class EmbeddedLimitsTest {
ParseContext parseContext = new ParseContext();
parseContext.set(ParseMode.class, ParseMode.RMETA);
// No limits set - should get all embedded documents
-
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertTrue(pipesResult.isSuccess(), "Parse should succeed");
- // Without limits, should get container + all embedded documents
- int metadataCount = pipesResult.emitData().getMetadataList().size();
- assertTrue(metadataCount >= 2,
- "Should have at least 2 metadata objects (container +
embedded), got: " + metadataCount);
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ assertTrue(pipesResult.isSuccess(), "Parse should succeed");
+ // Without limits, should get container + all embedded documents
+ int metadataCount =
pipesResult.emitData().getMetadataList().size();
+ assertTrue(metadataCount >= 2,
+ "Should have at least 2 metadata objects (container +
embedded), got: " + metadataCount);
+ }
}
@Test
@@ -139,19 +138,19 @@ public class EmbeddedLimitsTest {
}
""");
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertTrue(pipesResult.isSuccess(), "Parse should succeed");
- // With maxCount=1, should have limited embedded documents
- int metadataCount = pipesResult.emitData().getMetadataList().size();
- assertTrue(metadataCount <= 2,
- "Should have at most 2 metadata objects with maxCount=1, got:
" + metadataCount);
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ assertTrue(pipesResult.isSuccess(), "Parse should succeed");
+ // With maxCount=1, should have limited embedded documents
+ int metadataCount =
pipesResult.emitData().getMetadataList().size();
+ assertTrue(metadataCount <= 2,
+ "Should have at most 2 metadata objects with maxCount=1,
got: " + metadataCount);
+ }
}
@Test
@@ -165,23 +164,23 @@ public class EmbeddedLimitsTest {
limits.setThrowOnMaxDepth(true);
parseContext.set(EmbeddedLimits.class, limits);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- // When throwOnMaxDepth=true and limit is exceeded, an exception is
thrown
- // but caught and recorded. Result is still "success" but with
exception.
- // The key behavior: parsing stops early, container metadata is
returned
- assertTrue(pipesResult.isSuccess(), "Parse should complete (with
exception recorded)");
- assertEquals(1, pipesResult.emitData().getMetadataList().size(),
- "Should have only container when maxDepth=0 with exception");
- // The status should indicate an exception was encountered
- assertEquals(PipesResult.RESULT_STATUS.PARSE_SUCCESS_WITH_EXCEPTION,
pipesResult.status(),
- "Should have parse exception status when throwOnMaxDepth=true
and limit exceeded");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ // When throwOnMaxDepth=true and limit is exceeded, an exception
is thrown
+ // but caught and recorded. Result is still "success" but with
exception.
+ // The key behavior: parsing stops early, container metadata is
returned
+ assertTrue(pipesResult.isSuccess(), "Parse should complete (with
exception recorded)");
+ assertEquals(1, pipesResult.emitData().getMetadataList().size(),
+ "Should have only container when maxDepth=0 with
exception");
+ // The status should indicate an exception was encountered
+
assertEquals(PipesResult.RESULT_STATUS.PARSE_SUCCESS_WITH_EXCEPTION,
pipesResult.status(),
+ "Should have parse exception status when
throwOnMaxDepth=true and limit exceeded");
+ }
}
@Test
@@ -195,24 +194,24 @@ public class EmbeddedLimitsTest {
limits.setThrowOnMaxCount(true);
parseContext.set(EmbeddedLimits.class, limits);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- // When throwOnMaxCount=true and limit is exceeded, an exception is
thrown
- // but caught and recorded. Result is still "success" but with
exception.
- // The key behavior: parsing stops early, limited metadata is returned
- assertTrue(pipesResult.isSuccess(), "Parse should complete (with
exception recorded)");
- int metadataCount = pipesResult.emitData().getMetadataList().size();
- assertTrue(metadataCount <= 2,
- "Should have at most 2 metadata objects with maxCount=1, got:
" + metadataCount);
- // The status should indicate an exception was encountered
- assertEquals(PipesResult.RESULT_STATUS.PARSE_SUCCESS_WITH_EXCEPTION,
pipesResult.status(),
- "Should have parse exception status when throwOnMaxCount=true
and limit exceeded");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ // When throwOnMaxCount=true and limit is exceeded, an exception
is thrown
+ // but caught and recorded. Result is still "success" but with
exception.
+ // The key behavior: parsing stops early, limited metadata is
returned
+ assertTrue(pipesResult.isSuccess(), "Parse should complete (with
exception recorded)");
+ int metadataCount =
pipesResult.emitData().getMetadataList().size();
+ assertTrue(metadataCount <= 2,
+ "Should have at most 2 metadata objects with maxCount=1,
got: " + metadataCount);
+ // The status should indicate an exception was encountered
+
assertEquals(PipesResult.RESULT_STATUS.PARSE_SUCCESS_WITH_EXCEPTION,
pipesResult.status(),
+ "Should have parse exception status when
throwOnMaxCount=true and limit exceeded");
+ }
}
@Test
@@ -225,20 +224,20 @@ public class EmbeddedLimitsTest {
limits.setMaxDepth(2);
parseContext.set(EmbeddedLimits.class, limits);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertTrue(pipesResult.isSuccess(), "Parse should succeed");
- // With maxDepth=2, first-level embedded should be parsed
- // mock-embedded.xml has 4 embedded documents
- int metadataCount = pipesResult.emitData().getMetadataList().size();
- assertTrue(metadataCount >= 2,
- "Should have at least 2 metadata objects with maxDepth=2, got:
" + metadataCount);
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ assertTrue(pipesResult.isSuccess(), "Parse should succeed");
+ // With maxDepth=2, first-level embedded should be parsed
+ // mock-embedded.xml has 4 embedded documents
+ int metadataCount =
pipesResult.emitData().getMetadataList().size();
+ assertTrue(metadataCount >= 2,
+ "Should have at least 2 metadata objects with maxDepth=2,
got: " + metadataCount);
+ }
}
@Test
@@ -251,25 +250,25 @@ public class EmbeddedLimitsTest {
limits.setMaxCount(2);
parseContext.set(EmbeddedLimits.class, limits);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertTrue(pipesResult.isSuccess(), "Parse should succeed");
- // With maxCount=2, we should get container + 2 embedded = 3 metadata
objects
- int metadataCount = pipesResult.emitData().getMetadataList().size();
- assertTrue(metadataCount <= 3,
- "Should have at most 3 metadata objects with maxCount=2, got:
" + metadataCount);
-
- // Check that the limit reached flag is set
- Metadata containerMetadata =
pipesResult.emitData().getMetadataList().get(0);
- String limitReached =
containerMetadata.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_RESOURCE_LIMIT_REACHED);
- assertEquals("true", limitReached,
- "Container metadata should have limit reached flag set");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ assertTrue(pipesResult.isSuccess(), "Parse should succeed");
+ // With maxCount=2, we should get container + 2 embedded = 3
metadata objects
+ int metadataCount =
pipesResult.emitData().getMetadataList().size();
+ assertTrue(metadataCount <= 3,
+ "Should have at most 3 metadata objects with maxCount=2,
got: " + metadataCount);
+
+ // Check that the limit reached flag is set
+ Metadata containerMetadata =
pipesResult.emitData().getMetadataList().get(0);
+ String limitReached =
containerMetadata.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_RESOURCE_LIMIT_REACHED);
+ assertEquals("true", limitReached,
+ "Container metadata should have limit reached flag set");
+ }
}
@Test
@@ -283,22 +282,22 @@ public class EmbeddedLimitsTest {
limits.setThrowOnMaxDepth(false);
parseContext.set(EmbeddedLimits.class, limits);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertTrue(pipesResult.isSuccess(), "Parse should succeed without
exception");
- assertEquals(1, pipesResult.emitData().getMetadataList().size(),
- "Should have only container when maxDepth=0");
-
- // Check that the depth limit reached flag is set
- Metadata containerMetadata =
pipesResult.emitData().getMetadataList().get(0);
- String limitReached =
containerMetadata.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_DEPTH_LIMIT_REACHED);
- assertEquals("true", limitReached,
- "Container metadata should have depth limit reached flag set");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ assertTrue(pipesResult.isSuccess(), "Parse should succeed without
exception");
+ assertEquals(1, pipesResult.emitData().getMetadataList().size(),
+ "Should have only container when maxDepth=0");
+
+ // Check that the depth limit reached flag is set
+ Metadata containerMetadata =
pipesResult.emitData().getMetadataList().get(0);
+ String limitReached =
containerMetadata.get(AbstractRecursiveParserWrapperHandler.EMBEDDED_DEPTH_LIMIT_REACHED);
+ assertEquals("true", limitReached,
+ "Container metadata should have depth limit reached flag
set");
+ }
}
}
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/FrictionlessUnpackTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/FrictionlessUnpackTest.java
index d8a240e4f6..deaf5e3561 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/FrictionlessUnpackTest.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/FrictionlessUnpackTest.java
@@ -89,29 +89,29 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- unpackConfig.setZeroPadName(8);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "FRICTIONLESS ZIPPED mode should succeed. Status: " +
pipesResult.status() +
- ", Message: " + pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ unpackConfig.setZeroPadName(8);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "FRICTIONLESS ZIPPED mode should succeed. Status: " +
pipesResult.status() +
+ ", Message: " + pipesResult.message());
+ }
// Find the frictionless zip file
List<Path> zipFiles = Files.list(outputDir)
@@ -153,29 +153,29 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.DIRECTORY);
- unpackConfig.setZeroPadName(8);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "FRICTIONLESS DIRECTORY mode should succeed. Status: " +
pipesResult.status() +
- ", Message: " + pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.DIRECTORY);
+ unpackConfig.setZeroPadName(8);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "FRICTIONLESS DIRECTORY mode should succeed. Status: " +
pipesResult.status() +
+ ", Message: " + pipesResult.message());
+ }
// Check that datapackage.json exists in output
List<Path> dataPackageFiles = Files.walk(outputDir)
@@ -204,27 +204,27 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- unpackConfig.setZeroPadName(8);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(), "Processing should succeed");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ unpackConfig.setZeroPadName(8);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(), "Processing should succeed");
+ }
// Extract and parse datapackage.json from zip
List<Path> zipFiles = Files.list(outputDir)
@@ -287,27 +287,27 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- unpackConfig.setZeroPadName(8);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(), "Processing should succeed");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ unpackConfig.setZeroPadName(8);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(), "Processing should succeed");
+ }
// Extract datapackage.json and verify hashes against actual file
content
List<Path> zipFiles = Files.list(outputDir)
@@ -354,30 +354,30 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- unpackConfig.setIncludeFullMetadata(true); // Include metadata.json
- unpackConfig.setZeroPadName(8);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "Processing with includeFullMetadata should succeed. Status: "
+
- pipesResult.status() + ", Message: " +
pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ unpackConfig.setIncludeFullMetadata(true); // Include
metadata.json
+ unpackConfig.setZeroPadName(8);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "Processing with includeFullMetadata should succeed.
Status: " +
+ pipesResult.status() + ", Message: " +
pipesResult.message());
+ }
// Find the zip file and verify metadata.json exists
List<Path> zipFiles = Files.list(outputDir)
@@ -429,27 +429,27 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // No DigesterFactory in parseContext - should auto-add SHA256
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "Processing without DigesterFactory should succeed");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // No DigesterFactory in parseContext - should auto-add SHA256
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "Processing without DigesterFactory should succeed");
+ }
// Verify hashes are still computed (auto-added digester)
List<Path> zipFiles = Files.list(outputDir)
@@ -489,32 +489,32 @@ public class FrictionlessUnpackTest {
TikaJsonConfig tikaJsonConfig = TikaJsonConfig.load(pipesConfigPath);
PipesConfig pipesConfig = PipesConfig.load(tikaJsonConfig);
- PipesClient pipesClient = new PipesClient(pipesConfig,
pipesConfigPath);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- // Add selector to only include XML files
- // (The mock embedded files are XML)
- StandardUnpackSelector selector = new StandardUnpackSelector();
- selector.setIncludeMimeTypes(Set.of("application/mock+xml",
"application/xml", "text/xml"));
- parseContext.set(UnpackSelector.class, selector);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "Processing with UnpackSelector should succeed");
+ try (PipesClient pipesClient = new PipesClient(pipesConfig,
pipesConfigPath)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ // Add selector to only include XML files
+ // (The mock embedded files are XML)
+ StandardUnpackSelector selector = new StandardUnpackSelector();
+ selector.setIncludeMimeTypes(Set.of("application/mock+xml",
"application/xml", "text/xml"));
+ parseContext.set(UnpackSelector.class, selector);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "Processing with UnpackSelector should succeed");
+ }
// Verify that filtering was applied
List<Path> zipFiles = Files.list(outputDir)
@@ -547,26 +547,26 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.REGULAR); //
Explicit REGULAR
- unpackConfig.setZipEmbeddedFiles(true); // Use zip output for
comparison
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "REGULAR format should still work. Status: " +
pipesResult.status());
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+ unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.REGULAR);
// Explicit REGULAR
+ unpackConfig.setZipEmbeddedFiles(true); // Use zip output for
comparison
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "REGULAR format should still work. Status: " +
pipesResult.status());
+ }
// Should create -embedded.zip (not -frictionless.zip)
List<Path> regularZips = Files.list(outputDir)
@@ -590,26 +590,26 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, simpleDoc);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(simpleDoc,
- new FetchKey(FETCHER_NAME, simpleDoc),
- new EmitKey(EMITTER_NAME, simpleDoc),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "Frictionless should succeed with no embedded files");
+ try (PipesClient pipesClient = init(tmp, simpleDoc)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(simpleDoc,
+ new FetchKey(FETCHER_NAME, simpleDoc),
+ new EmitKey(EMITTER_NAME, simpleDoc),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "Frictionless should succeed with no embedded files");
+ }
// Should either not create zip or create zip with empty resources
List<Path> zipFiles = Files.list(outputDir)
@@ -639,27 +639,27 @@ public class FrictionlessUnpackTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(EMITTER_NAME);
- unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
- unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
- unpackConfig.setIncludeOriginal(true); // Include container document
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
- new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
- new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
- new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "Frictionless with includeOriginal should succeed");
+ try (PipesClient pipesClient = init(tmp, TEST_DOC_WITH_EMBEDDED)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(EMITTER_NAME);
+
unpackConfig.setOutputFormat(UnpackConfig.OUTPUT_FORMAT.FRICTIONLESS);
+ unpackConfig.setOutputMode(UnpackConfig.OUTPUT_MODE.ZIPPED);
+ unpackConfig.setIncludeOriginal(true); // Include container
document
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC_WITH_EMBEDDED,
+ new FetchKey(FETCHER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new EmitKey(EMITTER_NAME, TEST_DOC_WITH_EMBEDDED),
+ new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "Frictionless with includeOriginal should succeed");
+ }
List<Path> zipFiles = Files.list(outputDir)
.filter(p -> p.toString().endsWith("-frictionless.zip"))
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java
index d3d6b0fefa..0a763dff1c 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java
@@ -59,16 +59,15 @@ public class MetadataWriteLimiterTest {
*/
@Test
public void testWriteLimiterFromConfig(@TempDir Path tmp) throws Exception
{
- PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC, new FetchKey(FETCHER_NAME,
TEST_DOC),
- new EmitKey(), new Metadata(), new ParseContext(),
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
-
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
+ Metadata metadata;
+ try (PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC, new FetchKey(FETCHER_NAME,
TEST_DOC),
+ new EmitKey(), new Metadata(), new ParseContext(),
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ metadata = pipesResult.emitData().getMetadataList().get(0);
+ }
// These fields should be present (in includeFields or "must add"
fields)
assertNotNull(metadata.get("Content-Type"), "Content-Type should be
present");
@@ -88,31 +87,29 @@ public class MetadataWriteLimiterTest {
*/
@Test
public void testWriteLimiterOverrideViaParseContext(@TempDir Path tmp)
throws Exception {
- PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC);
-
- // Create a ParseContext with an override that allows
X-TIKA:parse_time_millis
- // The default config's includeFields (dc:creator, Content-Type,
X-TIKA:content)
- // does NOT include X-TIKA:parse_time_millis, but this override does.
- ParseContext parseContext = new ParseContext();
- String overrideJson = """
- {
- "includeFields": ["Content-Type",
"X-TIKA:parse_time_millis"],
- "maxKeySize": 100,
- "maxFieldSize": 1000,
- "maxTotalBytes": 10000,
- "maxValuesPerField": 5
- }
- """;
- parseContext.setJsonConfig("standard-metadata-limiter-factory", () ->
overrideJson);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(TEST_DOC, new FetchKey(FETCHER_NAME,
TEST_DOC),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
-
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
+ Metadata metadata;
+ try (PipesClient pipesClient = initWithWriteLimiter(tmp, TEST_DOC)) {
+ // Create a ParseContext with an override that allows
X-TIKA:parse_time_millis
+ // The default config's includeFields (dc:creator, Content-Type,
X-TIKA:content)
+ // does NOT include X-TIKA:parse_time_millis, but this override
does.
+ ParseContext parseContext = new ParseContext();
+ String overrideJson = """
+ {
+ "includeFields": ["Content-Type",
"X-TIKA:parse_time_millis"],
+ "maxKeySize": 100,
+ "maxFieldSize": 1000,
+ "maxTotalBytes": 10000,
+ "maxValuesPerField": 5
+ }
+ """;
+ parseContext.setJsonConfig("standard-metadata-limiter-factory", ()
-> overrideJson);
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(TEST_DOC, new FetchKey(FETCHER_NAME,
TEST_DOC),
+ new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ metadata = pipesResult.emitData().getMetadataList().get(0);
+ }
// These fields should be present (in the override includeFields or
ALWAYS_SET/ADD_FIELDS)
assertNotNull(metadata.get("Content-Type"), "Content-Type should be
present");
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
index 853136855d..03af3cdef1 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PassbackFilterTest.java
@@ -81,6 +81,7 @@ public class PassbackFilterTest {
.emitData()
.getMetadataList()
.get(0);
+ pipesClient.close();
assertEquals("TESTOVERLAPPINGTEXT.PDF",
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
assertNull(metadata.get(Metadata.CONTENT_TYPE));
assertNull(metadata.get(Metadata.CONTENT_LENGTH));
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java
index 7ab8a7f3a5..5b6e9dfad0 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/PipesClientTest.java
@@ -59,15 +59,15 @@ public class PipesClientTest {
@Test
public void testBasic(@TempDir Path tmp) throws Exception {
- PipesClient pipesClient = init(tmp, testDoc);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDoc, new FetchKey(fetcherName, testDoc),
- new EmitKey(), new Metadata(), new ParseContext(),
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
- Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
- assertEquals("testOverlappingText.pdf",
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ try (PipesClient pipesClient = init(tmp, testDoc)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDoc, new FetchKey(fetcherName,
testDoc),
+ new EmitKey(), new Metadata(), new ParseContext(),
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ Metadata metadata =
pipesResult.emitData().getMetadataList().get(0);
+ assertEquals("testOverlappingText.pdf",
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ }
}
@Test
@@ -78,14 +78,15 @@ public class PipesClientTest {
parseContext.setJsonConfig("metadata-filters", """
["mock-upper-case-filter"]
""");
- PipesClient pipesClient = init(tmp, testDoc);
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDoc, new FetchKey(fetcherName, testDoc),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
- Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
- assertEquals("TESTOVERLAPPINGTEXT.PDF",
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ try (PipesClient pipesClient = init(tmp, testDoc)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDoc, new FetchKey(fetcherName,
testDoc),
+ new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ Metadata metadata =
pipesResult.emitData().getMetadataList().get(0);
+ assertEquals("TESTOVERLAPPINGTEXT.PDF",
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ }
}
@Test
@@ -99,15 +100,15 @@ public class PipesClientTest {
String testFile = "mock-embedded.xml";
- PipesClient pipesClient = init(tmp, testFile);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
- Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(5, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
- assertEquals(4,
Integer.parseInt(metadata.get("X-TIKA:attachment_count")));
+ try (PipesClient pipesClient = init(tmp, testFile)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
+ new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(5, pipesResult.emitData().getMetadataList().size());
+ Metadata metadata =
pipesResult.emitData().getMetadataList().get(0);
+ assertEquals(4,
Integer.parseInt(metadata.get("X-TIKA:attachment_count")));
+ }
}
@Test
@@ -121,16 +122,17 @@ public class PipesClientTest {
]
""");
- PipesClient pipesClient = init(tmp, testDoc);
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDoc, new FetchKey(fetcherName, testDoc),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
- // MockUpperCaseFilter uppercases all metadata values
- assertEquals("TESTOVERLAPPINGTEXT.PDF",
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ try (PipesClient pipesClient = init(tmp, testDoc)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDoc, new FetchKey(fetcherName,
testDoc),
+ new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+
+ Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ Metadata metadata =
pipesResult.emitData().getMetadataList().get(0);
+ // MockUpperCaseFilter uppercases all metadata values
+ assertEquals("TESTOVERLAPPINGTEXT.PDF",
metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY));
+ }
}
@Test
@@ -145,15 +147,15 @@ public class PipesClientTest {
""");
String testFile = "mock-embedded.xml";
- PipesClient pipesClient = init(tmp, testFile);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
-
- Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(5, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
+ Metadata metadata;
+ try (PipesClient pipesClient = init(tmp, testFile)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
+ new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ Assertions.assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(5, pipesResult.emitData().getMetadataList().size());
+ metadata = pipesResult.emitData().getMetadataList().get(0);
+ }
// AttachmentCountingListFilter should have added the count
assertEquals(4,
Integer.parseInt(metadata.get("X-TIKA:attachment_count")));
@@ -175,11 +177,12 @@ public class PipesClientTest {
""");
String testFile = "mock-timeout-10s.xml";
- PipesClient pipesClient = init(tmp, testFile);
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
- new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
- assertEquals(PipesResults.TIMEOUT.status(), pipesResult.status());
+ try (PipesClient pipesClient = init(tmp, testFile)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
+ new EmitKey(), new Metadata(), parseContext,
FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ assertEquals(PipesResults.TIMEOUT.status(), pipesResult.status());
+ }
}
@Test
@@ -225,7 +228,7 @@ public class PipesClientTest {
assertEquals(PipesResult.RESULT_STATUS.PARSE_SUCCESS,
successResult.status(),
"Should succeed with 10 second timeout on 3 second file");
Assertions.assertNotNull(successResult.emitData().getMetadataList());
- assertTrue(successResult.emitData().getMetadataList().size() > 0);
+ assertFalse(successResult.emitData().getMetadataList().isEmpty());
}
}
@@ -717,29 +720,29 @@ public class PipesClientTest {
@Test
public void testContentOnlyMode(@TempDir Path tmp) throws Exception {
// Test that CONTENT_ONLY mode strips all metadata except
X-TIKA:content
- PipesClient pipesClient = init(tmp, testDoc);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.CONTENT_ONLY);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDoc, new FetchKey(fetcherName, testDoc),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
- assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
-
- // Content should be present
- String content = metadata.get(TikaCoreProperties.TIKA_CONTENT);
- assertNotNull(content, "TIKA_CONTENT should be present in CONTENT_ONLY
mode");
- assertFalse(content.isEmpty(), "TIKA_CONTENT should not be empty");
-
- // Other metadata should be stripped by the IncludeFieldMetadataFilter
- assertNull(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY),
- "RESOURCE_NAME should be stripped in CONTENT_ONLY mode");
- assertNull(metadata.get(Metadata.CONTENT_TYPE),
- "CONTENT_TYPE should be stripped in CONTENT_ONLY mode");
+ try (PipesClient pipesClient = init(tmp, testDoc)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.CONTENT_ONLY);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDoc, new FetchKey(fetcherName,
testDoc),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ Metadata metadata =
pipesResult.emitData().getMetadataList().get(0);
+
+ // Content should be present
+ String content = metadata.get(TikaCoreProperties.TIKA_CONTENT);
+ assertNotNull(content, "TIKA_CONTENT should be present in
CONTENT_ONLY mode");
+ assertFalse(content.isEmpty(), "TIKA_CONTENT should not be empty");
+
+ // Other metadata should be stripped by the
IncludeFieldMetadataFilter
+ assertNull(metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY),
+ "RESOURCE_NAME should be stripped in CONTENT_ONLY mode");
+ assertNull(metadata.get(Metadata.CONTENT_TYPE),
+ "CONTENT_TYPE should be stripped in CONTENT_ONLY mode");
+ }
}
@Test
@@ -752,20 +755,21 @@ public class PipesClientTest {
["mock-upper-case-filter"]
""");
- PipesClient pipesClient = init(tmp, testDoc);
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDoc, new FetchKey(fetcherName, testDoc),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
- assertNotNull(pipesResult.emitData().getMetadataList());
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
-
- // User filter (uppercase) should take effect instead of CONTENT_ONLY
filter
- // So all metadata should still be present (but uppercased)
- assertEquals("TESTOVERLAPPINGTEXT.PDF",
- metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY),
- "User filter should take priority over CONTENT_ONLY filter");
+ try (PipesClient pipesClient = init(tmp, testDoc)) {
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDoc, new FetchKey(fetcherName,
testDoc),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ assertNotNull(pipesResult.emitData().getMetadataList());
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ Metadata metadata =
pipesResult.emitData().getMetadataList().get(0);
+
+ // User filter (uppercase) should take effect instead of
CONTENT_ONLY filter
+ // So all metadata should still be present (but uppercased)
+ assertEquals("TESTOVERLAPPINGTEXT.PDF",
+ metadata.get(TikaCoreProperties.RESOURCE_NAME_KEY),
+ "User filter should take priority over CONTENT_ONLY
filter");
+ }
}
@Test
@@ -827,19 +831,19 @@ public class PipesClientTest {
// Test that CONCATENATE mode returns a single metadata object with
content
// but preserves all metadata fields (unlike CONTENT_ONLY)
String testFile = "mock-embedded.xml";
- PipesClient pipesClient = init(tmp, testFile);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.CONCATENATE);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
- new EmitKey(), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
- assertNotNull(pipesResult.emitData().getMetadataList());
- // CONCATENATE produces a single metadata object (not one per embedded
doc)
- assertEquals(1, pipesResult.emitData().getMetadataList().size());
- Metadata metadata = pipesResult.emitData().getMetadataList().get(0);
+ Metadata metadata;
+ try (PipesClient pipesClient = init(tmp, testFile)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.CONCATENATE);
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testFile, new FetchKey(fetcherName,
testFile),
+ new EmitKey(), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.SKIP));
+ assertNotNull(pipesResult.emitData().getMetadataList());
+ // CONCATENATE produces a single metadata object (not one per
embedded doc)
+ assertEquals(1, pipesResult.emitData().getMetadataList().size());
+ metadata = pipesResult.emitData().getMetadataList().get(0);
+ }
// Content should be present
String content = metadata.get(TikaCoreProperties.TIKA_CONTENT);
diff --git
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/UnpackModeTest.java
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/UnpackModeTest.java
index cb037710e1..057dfa4eb0 100644
---
a/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/UnpackModeTest.java
+++
b/tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/UnpackModeTest.java
@@ -63,109 +63,109 @@ public class UnpackModeTest {
@Test
public void testUnpackModeBasic(@TempDir Path tmp) throws Exception {
// Test that UNPACK mode works and returns metadata like RMETA
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(), "UNPACK mode should succeed.
Status: " + pipesResult.status() +
- ", Message: " + pipesResult.message());
-
- // UNPACK mode may return EMIT_SUCCESS (without emitData) if passback
filter is not used
- // Check if we have emitData, otherwise just verify success
- if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
- // With RMETA-like behavior, we should get metadata for container
+ embedded docs
- // mock-embedded.xml has 4 embedded documents, so we expect 5
metadata objects
- List<Metadata> metadataList =
pipesResult.emitData().getMetadataList();
- assertEquals(5, metadataList.size(),
- "UNPACK should return RMETA-style metadata list (container
+ 4 embedded docs)");
-
- // Verify container metadata
- assertEquals("Nikolai Lobachevsky",
metadataList.get(0).get("author"));
-
- // Verify embedded metadata
- for (int i = 1; i < metadataList.size(); i++) {
- assertEquals("embeddedAuthor",
metadataList.get(i).get("author"),
- "Embedded document " + i + " should have embedded
author");
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(), "UNPACK mode should succeed.
Status: " + pipesResult.status() +
+ ", Message: " + pipesResult.message());
+
+ // UNPACK mode may return EMIT_SUCCESS (without emitData) if
passback filter is not used
+ // Check if we have emitData, otherwise just verify success
+ if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
+ // With RMETA-like behavior, we should get metadata for
container + embedded docs
+ // mock-embedded.xml has 4 embedded documents, so we expect 5
metadata objects
+ List<Metadata> metadataList =
pipesResult.emitData().getMetadataList();
+ assertEquals(5, metadataList.size(),
+ "UNPACK should return RMETA-style metadata list
(container + 4 embedded docs)");
+
+ // Verify container metadata
+ assertEquals("Nikolai Lobachevsky",
metadataList.get(0).get("author"));
+
+ // Verify embedded metadata
+ for (int i = 1; i < metadataList.size(); i++) {
+ assertEquals("embeddedAuthor",
metadataList.get(i).get("author"),
+ "Embedded document " + i + " should have embedded
author");
+ }
}
+ // Even without emitData passback, the fact that isSuccess() is
true means UNPACK worked
}
- // Even without emitData passback, the fact that isSuccess() is true
means UNPACK worked
}
@Test
public void testUnpackModeAutoSetup(@TempDir Path tmp) throws Exception {
// Test that UNPACK mode works without explicit UnpackConfig
- // It should automatically set up UnpackExtractor and
EmittingUnpackHandler
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
- // No UnpackConfig set - should be created automatically
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK should work without explicit UnpackConfig. Status: " +
pipesResult.status() +
- ", Message: " + pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ // It should automatically set up UnpackExtractor and
EmittingUnpackHandler
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+ // No UnpackConfig set - should be created automatically
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK should work without explicit UnpackConfig. Status:
" + pipesResult.status() +
+ ", Message: " + pipesResult.message());
+ }
}
@Test
public void testUnpackModeRequiresEmitter(@TempDir Path tmp) throws
Exception {
// Test that UNPACK mode fails gracefully when no emitter is specified
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // Create EmitKey with no emitterId to trigger the error
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey("", ""), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- // Should fail because no emitter is configured
- // The error could be a crash (TikaConfigException thrown),
initialization failure, or task exception
- assertTrue(!pipesResult.isSuccess(),
- "UNPACK without emitter should fail. Status: " +
pipesResult.status());
- assertNotNull(pipesResult.message());
- assertTrue(pipesResult.message().contains("emitter") ||
pipesResult.message().contains("UNPACK") ||
- pipesResult.message().contains("TikaConfigException"),
- "Error message should mention emitter requirement: " +
pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // Create EmitKey with no emitterId to trigger the error
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey("", ""), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ // Should fail because no emitter is configured
+ // The error could be a crash (TikaConfigException thrown),
initialization failure, or task exception
+ assertTrue(!pipesResult.isSuccess(),
+ "UNPACK without emitter should fail. Status: " +
pipesResult.status());
+ assertNotNull(pipesResult.message());
+ assertTrue(pipesResult.message().contains("emitter") ||
pipesResult.message().contains("UNPACK") ||
+ pipesResult.message().contains("TikaConfigException"),
+ "Error message should mention emitter requirement: " +
pipesResult.message());
+ }
}
@Test
public void testUnpackModeReturnsMetadata(@TempDir Path tmp) throws
Exception {
// Test that UNPACK mode returns full metadata list like RMETA
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(), "Processing should succeed.
Status: " + pipesResult.status() +
- ", Message: " + pipesResult.message());
-
- // Check if emitData is available (depends on emit strategy)
- if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
- List<Metadata> metadataList =
pipesResult.emitData().getMetadataList();
- assertTrue(metadataList.size() > 1,
- "UNPACK should return multiple metadata objects for
documents with embedded content");
-
- // Each metadata object should have content type
- for (Metadata m : metadataList) {
- assertNotNull(m.get("Content-Type"), "Each document should
have Content-Type");
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(), "Processing should succeed.
Status: " + pipesResult.status() +
+ ", Message: " + pipesResult.message());
+
+ // Check if emitData is available (depends on emit strategy)
+ if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
+ List<Metadata> metadataList =
pipesResult.emitData().getMetadataList();
+ assertTrue(metadataList.size() > 1,
+ "UNPACK should return multiple metadata objects for
documents with embedded content");
+
+ // Each metadata object should have content type
+ for (Metadata m : metadataList) {
+ assertNotNull(m.get("Content-Type"), "Each document should
have Content-Type");
+ }
}
}
}
@@ -173,92 +173,92 @@ public class UnpackModeTest {
@Test
public void testUnpackModeWithCustomUnpackConfig(@TempDir Path tmp) throws
Exception {
// Test that UNPACK mode respects custom UnpackConfig settings
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // Create custom UnpackConfig with specific settings
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setZeroPadName(8);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with custom UnpackConfig should succeed. Status: " +
pipesResult.status());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // Create custom UnpackConfig with specific settings
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setZeroPadName(8);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with custom UnpackConfig should succeed. Status: "
+ pipesResult.status());
+ }
}
@Test
public void testUnpackModeWithIncludeOriginal(@TempDir Path tmp) throws
Exception {
// Test that includeOriginal=true works with UNPACK mode
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setIncludeOriginal(true);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with includeOriginal should succeed. Status: " +
pipesResult.status());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setIncludeOriginal(true);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with includeOriginal should succeed. Status: " +
pipesResult.status());
+ }
}
@Test
public void testUnpackModeVsRmetaMode(@TempDir Path tmp) throws Exception {
// Compare UNPACK mode output with RMETA mode to verify metadata
consistency
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- // Process with RMETA
- ParseContext rmetaContext = new ParseContext();
- rmetaContext.set(ParseMode.class, ParseMode.RMETA);
-
- PipesResult rmetaResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded + "-rmeta", new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded +
"-rmeta"), new Metadata(), rmetaContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- // Process with UNPACK
- ParseContext unpackContext = new ParseContext();
- unpackContext.set(ParseMode.class, ParseMode.UNPACK);
-
- PipesResult unpackResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded + "-unpack", new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded +
"-unpack"), new Metadata(), unpackContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- // Both should succeed
- assertTrue(rmetaResult.isSuccess(), "RMETA processing should succeed.
Status: " + rmetaResult.status());
- assertTrue(unpackResult.isSuccess(), "UNPACK processing should
succeed. Status: " + unpackResult.status() +
- ", Message: " + unpackResult.message());
-
- // If emitData is available for both, compare them
- if (rmetaResult.emitData() != null &&
rmetaResult.emitData().getMetadataList() != null &&
- unpackResult.emitData() != null &&
unpackResult.emitData().getMetadataList() != null) {
- List<Metadata> rmetaList =
rmetaResult.emitData().getMetadataList();
- List<Metadata> unpackList =
unpackResult.emitData().getMetadataList();
-
- assertEquals(rmetaList.size(), unpackList.size(),
- "UNPACK should return same number of metadata objects as
RMETA");
-
- // Compare key metadata values
- for (int i = 0; i < rmetaList.size(); i++) {
- assertEquals(rmetaList.get(i).get("author"),
unpackList.get(i).get("author"),
- "Author metadata should match at index " + i);
- assertEquals(rmetaList.get(i).get("Content-Type"),
unpackList.get(i).get("Content-Type"),
- "Content-Type should match at index " + i);
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ // Process with RMETA
+ ParseContext rmetaContext = new ParseContext();
+ rmetaContext.set(ParseMode.class, ParseMode.RMETA);
+
+ PipesResult rmetaResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded + "-rmeta", new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded +
"-rmeta"), new Metadata(), rmetaContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ // Process with UNPACK
+ ParseContext unpackContext = new ParseContext();
+ unpackContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ PipesResult unpackResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded + "-unpack", new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded +
"-unpack"), new Metadata(), unpackContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ // Both should succeed
+ assertTrue(rmetaResult.isSuccess(), "RMETA processing should
succeed. Status: " + rmetaResult.status());
+ assertTrue(unpackResult.isSuccess(), "UNPACK processing should
succeed. Status: " + unpackResult.status() +
+ ", Message: " + unpackResult.message());
+
+ // If emitData is available for both, compare them
+ if (rmetaResult.emitData() != null &&
rmetaResult.emitData().getMetadataList() != null &&
+ unpackResult.emitData() != null &&
unpackResult.emitData().getMetadataList() != null) {
+ List<Metadata> rmetaList =
rmetaResult.emitData().getMetadataList();
+ List<Metadata> unpackList =
unpackResult.emitData().getMetadataList();
+
+ assertEquals(rmetaList.size(), unpackList.size(),
+ "UNPACK should return same number of metadata objects
as RMETA");
+
+ // Compare key metadata values
+ for (int i = 0; i < rmetaList.size(); i++) {
+ assertEquals(rmetaList.get(i).get("author"),
unpackList.get(i).get("author"),
+ "Author metadata should match at index " + i);
+ assertEquals(rmetaList.get(i).get("Content-Type"),
unpackList.get(i).get("Content-Type"),
+ "Content-Type should match at index " + i);
+ }
}
}
}
@@ -267,24 +267,24 @@ public class UnpackModeTest {
public void testUnpackModeWithSimpleDocument(@TempDir Path tmp) throws
Exception {
// Test UNPACK mode with a simple document (no embedded files)
String simpleDoc = "mock_times.xml";
- PipesClient pipesClient = init(tmp, simpleDoc);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(simpleDoc, new FetchKey(fetcherName,
simpleDoc),
- new EmitKey(emitterName, simpleDoc), new Metadata(),
parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK should work with simple documents. Status: " +
pipesResult.status() +
- ", Message: " + pipesResult.message());
-
- // Check emitData if available
- if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
- assertEquals(1, pipesResult.emitData().getMetadataList().size(),
- "Simple document should have exactly one metadata object");
+ try (PipesClient pipesClient = init(tmp, simpleDoc)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(simpleDoc, new FetchKey(fetcherName,
simpleDoc),
+ new EmitKey(emitterName, simpleDoc), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK should work with simple documents. Status: " +
pipesResult.status() +
+ ", Message: " + pipesResult.message());
+
+ // Check emitData if available
+ if (pipesResult.emitData() != null &&
pipesResult.emitData().getMetadataList() != null) {
+ assertEquals(1,
pipesResult.emitData().getMetadataList().size(),
+ "Simple document should have exactly one metadata
object");
+ }
}
}
@@ -310,24 +310,24 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(), "UNPACK should succeed");
-
- // Check that output files were created for the embedded documents
- // The exact naming depends on the EmittingUnpackHandler configuration
- // At minimum, we verify the metadata JSON was written
- assertTrue(Files.exists(outputDir.resolve(testDocWithEmbedded +
".json")) ||
- Files.list(outputDir).count() > 0,
- "Output directory should contain emitted files");
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(), "UNPACK should succeed");
+
+ // Check that output files were created for the embedded documents
+ // The exact naming depends on the EmittingUnpackHandler
configuration
+ // At minimum, we verify the metadata JSON was written
+ assertTrue(Files.exists(outputDir.resolve(testDocWithEmbedded +
".json")) ||
+ Files.list(outputDir).count() > 0,
+ "Output directory should contain emitted files");
+ }
}
@Test
@@ -336,26 +336,26 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // Configure UnpackConfig for zip output
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setZipEmbeddedFiles(true);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with zipEmbeddedFiles should succeed. Status: " +
pipesResult.status() +
- ", Message: " + pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // Configure UnpackConfig for zip output
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setZipEmbeddedFiles(true);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with zipEmbeddedFiles should succeed. Status: " +
pipesResult.status() +
+ ", Message: " + pipesResult.message());
+ }
// Find the zip file in output directory
List<Path> zipFiles = Files.list(outputDir)
@@ -389,27 +389,27 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // Configure UnpackConfig for zip output with metadata
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setZipEmbeddedFiles(true);
- unpackConfig.setIncludeMetadataInZip(true);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with zipEmbeddedFiles and metadata should succeed.
Status: " + pipesResult.status() +
- ", Message: " + pipesResult.message());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // Configure UnpackConfig for zip output with metadata
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setZipEmbeddedFiles(true);
+ unpackConfig.setIncludeMetadataInZip(true);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with zipEmbeddedFiles and metadata should succeed.
Status: " + pipesResult.status() +
+ ", Message: " + pipesResult.message());
+ }
// Find the zip file
List<Path> zipFiles = Files.list(outputDir)
@@ -443,26 +443,26 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // Configure UnpackConfig for zip output with original document
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setZipEmbeddedFiles(true);
- unpackConfig.setIncludeOriginal(true);
- unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with includeOriginal should succeed. Status: " +
pipesResult.status());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // Configure UnpackConfig for zip output with original document
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setZipEmbeddedFiles(true);
+ unpackConfig.setIncludeOriginal(true);
+
unpackConfig.setSuffixStrategy(UnpackConfig.SUFFIX_STRATEGY.DETECTED);
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded, new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with includeOriginal should succeed. Status: " +
pipesResult.status());
+ }
// Find the zip file
List<Path> zipFiles = Files.list(outputDir)
@@ -497,23 +497,23 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, simpleDoc);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
+ try (PipesClient pipesClient = init(tmp, simpleDoc)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setZipEmbeddedFiles(true);
- parseContext.set(UnpackConfig.class, unpackConfig);
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setZipEmbeddedFiles(true);
+ parseContext.set(UnpackConfig.class, unpackConfig);
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(simpleDoc, new FetchKey(fetcherName,
simpleDoc),
- new EmitKey(emitterName, simpleDoc), new Metadata(),
parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(simpleDoc, new FetchKey(fetcherName,
simpleDoc),
+ new EmitKey(emitterName, simpleDoc), new
Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with zipEmbeddedFiles on simple doc should succeed.
Status: " + pipesResult.status());
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with zipEmbeddedFiles on simple doc should
succeed. Status: " + pipesResult.status());
+ }
// Check for zip files - there may be none if no embedded docs
List<Path> zipFiles = Files.list(outputDir)
@@ -537,26 +537,26 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // Set a very low maxUnpackBytes limit (10 bytes) - this should cause
- // extraction to stop early after the first few bytes
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setMaxUnpackBytes(10L); // Only allow 10 bytes total
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded + "-limited", new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded +
"-limited"), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- // The parse should succeed (limit exceeded just stops extraction,
doesn't fail)
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with maxUnpackBytes limit should succeed. Status: " +
pipesResult.status());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // Set a very low maxUnpackBytes limit (10 bytes) - this should
cause
+ // extraction to stop early after the first few bytes
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setMaxUnpackBytes(10L); // Only allow 10 bytes total
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded + "-limited", new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded +
"-limited"), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ // The parse should succeed (limit exceeded just stops extraction,
doesn't fail)
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with maxUnpackBytes limit should succeed. Status:
" + pipesResult.status());
+ }
// The output should be limited - total extracted bytes should be <= 10
// We can verify this by checking that not all embedded files were
written
@@ -585,26 +585,26 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
-
- // Use UnpackConfig with default maxUnpackBytes (10GB)
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- // Not setting maxUnpackBytes - should use default 10GB
- assertEquals(UnpackConfig.DEFAULT_MAX_UNPACK_BYTES,
unpackConfig.getMaxUnpackBytes(),
- "Default maxUnpackBytes should be 10GB");
- parseContext.set(UnpackConfig.class, unpackConfig);
-
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded + "-default", new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded +
"-default"), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
-
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with default maxUnpackBytes should succeed. Status: "
+ pipesResult.status());
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
+
+ // Use UnpackConfig with default maxUnpackBytes (10GB)
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ // Not setting maxUnpackBytes - should use default 10GB
+ assertEquals(UnpackConfig.DEFAULT_MAX_UNPACK_BYTES,
unpackConfig.getMaxUnpackBytes(),
+ "Default maxUnpackBytes should be 10GB");
+ parseContext.set(UnpackConfig.class, unpackConfig);
+
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded + "-default", new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded +
"-default"), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with default maxUnpackBytes should succeed.
Status: " + pipesResult.status());
+ }
}
@Test
@@ -613,23 +613,23 @@ public class UnpackModeTest {
Path outputDir = tmp.resolve("output");
Files.createDirectories(outputDir);
- PipesClient pipesClient = init(tmp, testDocWithEmbedded);
-
- ParseContext parseContext = new ParseContext();
- parseContext.set(ParseMode.class, ParseMode.UNPACK);
+ try (PipesClient pipesClient = init(tmp, testDocWithEmbedded)) {
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(ParseMode.class, ParseMode.UNPACK);
- // Set maxUnpackBytes to -1 (unlimited)
- UnpackConfig unpackConfig = new UnpackConfig();
- unpackConfig.setEmitter(emitterName);
- unpackConfig.setMaxUnpackBytes(-1L); // Unlimited
- parseContext.set(UnpackConfig.class, unpackConfig);
+ // Set maxUnpackBytes to -1 (unlimited)
+ UnpackConfig unpackConfig = new UnpackConfig();
+ unpackConfig.setEmitter(emitterName);
+ unpackConfig.setMaxUnpackBytes(-1L); // Unlimited
+ parseContext.set(UnpackConfig.class, unpackConfig);
- PipesResult pipesResult = pipesClient.process(
- new FetchEmitTuple(testDocWithEmbedded + "-unlimited", new
FetchKey(fetcherName, testDocWithEmbedded),
- new EmitKey(emitterName, testDocWithEmbedded +
"-unlimited"), new Metadata(), parseContext,
- FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
+ PipesResult pipesResult = pipesClient.process(
+ new FetchEmitTuple(testDocWithEmbedded + "-unlimited", new
FetchKey(fetcherName, testDocWithEmbedded),
+ new EmitKey(emitterName, testDocWithEmbedded +
"-unlimited"), new Metadata(), parseContext,
+ FetchEmitTuple.ON_PARSE_EXCEPTION.EMIT));
- assertTrue(pipesResult.isSuccess(),
- "UNPACK with unlimited maxUnpackBytes should succeed. Status:
" + pipesResult.status());
+ assertTrue(pipesResult.isSuccess(),
+ "UNPACK with unlimited maxUnpackBytes should succeed.
Status: " + pipesResult.status());
+ }
}
}