This is an automated email from the ASF dual-hosted git repository.
tallison pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tika.git
The following commit(s) were added to refs/heads/main by this push:
new 45a9d21bac TIKA-4669 -- improve serdes (#2617)
45a9d21bac is described below
commit 45a9d21bac63d9aac24aec3990dfc5e9f58cd197
Author: Tim Allison <[email protected]>
AuthorDate: Wed Feb 18 16:25:05 2026 -0500
TIKA-4669 -- improve serdes (#2617)
---
.../java/org/apache/tika/parser/ParseContext.java | 15 ++-
.../org/apache/tika/parser/ParseContextTest.java | 104 +++++++++++++++++++++
.../serialization/FetchEmitTupleDeserializer.java | 8 --
3 files changed, 116 insertions(+), 11 deletions(-)
diff --git a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 4a3bafa91b..8d0094de11 100644
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@ -239,9 +239,18 @@ public class ParseContext implements Serializable {
}
// Copy typed objects
context.putAll(source.context);
- // Copy JSON configs
- jsonConfigs.putAll(source.jsonConfigs);
- // Copy resolved configs (if any)
+ // Copy JSON configs, invalidating stale resolved state for overridden
keys.
+ // When a source jsonConfig overrides an existing entry, the
previously resolved
+ // object is stale and must be cleared so resolveAll() will re-resolve
from the
+ // new JSON config.
+ for (Map.Entry<String, JsonConfig> entry :
source.jsonConfigs.entrySet()) {
+ String key = entry.getKey();
+ jsonConfigs.put(key, entry.getValue());
+ if (resolvedConfigs != null) {
+ resolvedConfigs.remove(key);
+ }
+ }
+ // Copy resolved configs from source (if any)
if (source.resolvedConfigs != null &&
!source.resolvedConfigs.isEmpty()) {
if (resolvedConfigs == null) {
resolvedConfigs = new HashMap<>();
diff --git
a/tika-core/src/test/java/org/apache/tika/parser/ParseContextTest.java
b/tika-core/src/test/java/org/apache/tika/parser/ParseContextTest.java
new file mode 100644
index 0000000000..59f24ee73b
--- /dev/null
+++ b/tika-core/src/test/java/org/apache/tika/parser/ParseContextTest.java
@@ -0,0 +1,104 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.tika.parser;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertNull;
+
+import org.junit.jupiter.api.Test;
+
+public class ParseContextTest {
+
+ @Test
+ public void testCopyFromInvalidatesStaleResolvedConfigs() {
+ // Simulate a "default" context that has already resolved a config
+ ParseContext defaults = new ParseContext();
+ defaults.setJsonConfig("my-component", "{\"value\":\"default\"}");
+ Object defaultResolved = new Object();
+ defaults.setResolvedConfig("my-component", defaultResolved);
+
+ // Simulate a request context that overrides only the jsonConfig (no
resolvedConfig)
+ ParseContext request = new ParseContext();
+ request.setJsonConfig("my-component", "{\"value\":\"override\"}");
+
+ // Merge: defaults + request overlay
+ defaults.copyFrom(request);
+
+ // The stale resolvedConfig must be cleared so resolveAll() will
re-resolve
+ assertNull(defaults.getResolvedConfig("my-component"),
+ "copyFrom must clear stale resolvedConfig when jsonConfig is
overridden");
+
+ // The jsonConfig should be the override
+ assertNotNull(defaults.getJsonConfigs().get("my-component"));
+ assertEquals("{\"value\":\"override\"}",
+ defaults.getJsonConfigs().get("my-component").json());
+ }
+
+ @Test
+ public void testCopyFromPreservesResolvedConfigsForUnrelatedKeys() {
+ ParseContext defaults = new ParseContext();
+ defaults.setJsonConfig("component-a", "{\"a\":true}");
+ Object resolvedA = new Object();
+ defaults.setResolvedConfig("component-a", resolvedA);
+
+ // Request overrides a DIFFERENT key
+ ParseContext request = new ParseContext();
+ request.setJsonConfig("component-b", "{\"b\":true}");
+
+ defaults.copyFrom(request);
+
+ // component-a's resolvedConfig should be untouched
+ assertEquals(resolvedA, defaults.getResolvedConfig("component-a"),
+ "copyFrom must not clear resolvedConfigs for keys not
overridden by source");
+ }
+
+ @Test
+ public void testCopyFromWithSourceResolvedConfigOverrides() {
+ ParseContext defaults = new ParseContext();
+ defaults.setJsonConfig("my-component", "{\"value\":\"default\"}");
+ Object defaultResolved = new Object();
+ defaults.setResolvedConfig("my-component", defaultResolved);
+
+ // Source has both jsonConfig AND resolvedConfig (e.g., already
resolved upstream)
+ ParseContext source = new ParseContext();
+ source.setJsonConfig("my-component", "{\"value\":\"override\"}");
+ Object sourceResolved = new Object();
+ source.setResolvedConfig("my-component", sourceResolved);
+
+ defaults.copyFrom(source);
+
+ // Source's resolvedConfig should win
+ assertEquals(sourceResolved,
defaults.getResolvedConfig("my-component"),
+ "copyFrom should use source's resolvedConfig when source has
one");
+ }
+
+ @Test
+ public void testCopyFromEmptySourcePreservesDefaults() {
+ ParseContext defaults = new ParseContext();
+ defaults.setJsonConfig("my-component", "{\"value\":\"default\"}");
+ Object defaultResolved = new Object();
+ defaults.setResolvedConfig("my-component", defaultResolved);
+
+ ParseContext emptySource = new ParseContext();
+ defaults.copyFrom(emptySource);
+
+ // Empty source should not disturb existing state
+ assertEquals(defaultResolved,
defaults.getResolvedConfig("my-component"),
+ "copyFrom with empty source must preserve existing
resolvedConfigs");
+ }
+}
diff --git
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
index 7af419c09d..c842d60aa6 100644
---
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
+++
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/serialization/FetchEmitTupleDeserializer.java
@@ -38,13 +38,11 @@ import com.fasterxml.jackson.databind.JsonDeserializer;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
-import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.pipes.api.FetchEmitTuple;
import org.apache.tika.pipes.api.emitter.EmitKey;
import org.apache.tika.pipes.api.fetcher.FetchKey;
-import org.apache.tika.serialization.ParseContextUtils;
import org.apache.tika.serialization.serdes.ParseContextDeserializer;
public class FetchEmitTupleDeserializer extends
JsonDeserializer<FetchEmitTuple> {
@@ -64,12 +62,6 @@ public class FetchEmitTupleDeserializer extends
JsonDeserializer<FetchEmitTuple>
Metadata metadata = readMetadata(root);
JsonNode parseContextNode = root.get(PARSE_CONTEXT);
ParseContext parseContext = parseContextNode == null ? new
ParseContext() : ParseContextDeserializer.readParseContext(parseContextNode,
mapper);
- // Resolve all friendly-named components from jsonConfigs to actual
objects
- try {
- ParseContextUtils.resolveAll(parseContext,
FetchEmitTupleDeserializer.class.getClassLoader());
- } catch (TikaConfigException e) {
- throw new IOException("Failed to resolve parse-context
components", e);
- }
FetchEmitTuple.ON_PARSE_EXCEPTION onParseException =
readOnParseException(root);
return new FetchEmitTuple(id, new FetchKey(fetcherId, fetchKey,
fetchRangeStart, fetchRangeEnd),