This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4635
in repository https://gitbox.apache.org/repos/asf/tika.git

commit d94b8700c94ad778656e898fc4a15184a503ca52
Merge: 068976320e 766cf2cd51
Author: tallison <[email protected]>
AuthorDate: Wed Jan 28 08:37:10 2026 -0500

    Merge remote-tracking branch 'origin/main' into TIKA-4635
    
    # Conflicts:
    #       tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
    #       
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
    #       
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
    #       
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
    #       
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java

 docs/modules/ROOT/nav.adoc                         |   1 +
 docs/modules/ROOT/pages/advanced/index.adoc        |   1 +
 .../ROOT/pages/advanced/setting-limits.adoc        | 229 +++++++++++++++++++++
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  17 +-
 .../src/main/java/org/apache/tika/gui/TikaGUI.java |   7 +-
 tika-core/src/main/java/org/apache/tika/Tika.java  |  67 ++++--
 .../tika/extractor/ParserContainerExtractor.java   |   4 +-
 .../java/org/apache/tika/metadata/Metadata.java    |  44 ++--
 ...aWriteFilter.java => MetadataWriteLimiter.java} |  21 +-
 ...ctory.java => MetadataWriteLimiterFactory.java} |  19 +-
 ...iteFilter.java => StandardMetadataLimiter.java} |  94 +++------
 .../StandardMetadataLimiterFactory.java            | 152 ++++++++++++++
 .../writefilter/StandardWriteFilterFactory.java    | 127 ------------
 .../org/apache/tika/parser/AutoDetectParser.java   |   5 -
 .../apache/tika/parser/AutoDetectParserConfig.java |  15 +-
 .../java/org/apache/tika/parser/ParseContext.java  |  27 +++
 .../java/org/apache/tika/parser/ParsingReader.java |  21 +-
 .../apache/tika/parser/journal/TEIDOMParser.java   |   2 +-
 .../tika/parser/apple/AppleSingleFileParser.java   |   2 +-
 .../org/apache/tika/parser/apple/PListParser.java  |   9 +-
 .../parser/iwork/iwana/IWork13PackageParser.java   |   4 +-
 .../executable/UniversalExecutableParser.java      |   2 +-
 .../org/apache/tika/parser/crypto/Pkcs7Parser.java |   2 +-
 .../org/apache/tika/parser/crypto/TSDParser.java   |   2 +-
 .../org/apache/tika/parser/html/HtmlHandler.java   |   8 +-
 .../apache/tika/parser/jdbc/JDBCTableReader.java   |   4 +-
 .../tika/parser/mail/MailContentHandler.java       |   4 +-
 .../org/apache/tika/parser/mbox/MboxParser.java    |   2 +-
 .../parser/microsoft/AbstractPOIFSExtractor.java   |   4 +-
 .../apache/tika/parser/microsoft/EMFParser.java    |   4 +-
 .../tika/parser/microsoft/HSLFExtractor.java       |   4 +-
 .../tika/parser/microsoft/JackcessExtractor.java   |   2 +-
 .../apache/tika/parser/microsoft/OfficeParser.java |  14 +-
 .../tika/parser/microsoft/OutlookExtractor.java    |  12 +-
 .../apache/tika/parser/microsoft/TNEFParser.java   |   2 +-
 .../microsoft/activemime/ActiveMimeParser.java     |   2 +-
 .../tika/parser/microsoft/chm/ChmParser.java       |   2 +-
 .../tika/parser/microsoft/libpst/EmailVisitor.java |   6 +-
 .../microsoft/onenote/OneNoteTreeWalker.java       |   7 +-
 .../microsoft/ooxml/AbstractOOXMLExtractor.java    |   8 +-
 .../ooxml/XWPFWordExtractorDecorator.java          |   5 +-
 .../microsoft/ooxml/xps/XPSExtractorDecorator.java |   2 +-
 .../microsoft/ooxml/xps/XPSPageContentHandler.java |   8 +-
 .../ooxml/xwpf/ml2006/BinaryDataHandler.java       |   2 +-
 .../parser/microsoft/pst/OutlookPSTParser.java     |  10 +-
 .../parser/microsoft/pst/PSTMailItemParser.java    |   6 +-
 .../parser/microsoft/rtf/RTFEmbObjHandler.java     |   8 +-
 .../tika/parser/microsoft/xml/WordMLParser.java    |   2 +-
 .../org/apache/tika/parser/epub/EpubParser.java    |   2 +-
 .../apache/tika/parser/indesign/IDMLParser.java    |   6 +-
 .../parser/odf/FlatOpenDocumentMacroHandler.java   |   2 +-
 .../tika/parser/odf/OpenDocumentBodyHandler.java   |   2 +-
 .../apache/tika/parser/odf/OpenDocumentParser.java |   4 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  12 +-
 .../java/org/apache/tika/parser/pdf/PDF2XHTML.java |   2 +-
 .../java/org/apache/tika/parser/pdf/PDFParser.java |   2 +-
 .../tika/parser/pdf/image/ImageGraphicsEngine.java |   2 +-
 .../tika/renderer/pdf/mutool/MuPDFRenderer.java    |   2 +-
 .../tika/renderer/pdf/pdfbox/PDFBoxRenderer.java   |   2 +-
 .../apache/tika/parser/pkg/CompressorParser.java   |   2 +-
 .../org/apache/tika/parser/pkg/PackageParser.java  |   7 +-
 .../java/org/apache/tika/parser/pkg/RarParser.java |   2 +-
 .../org/apache/tika/parser/pkg/UnrarParser.java    |   2 +-
 .../org/apache/tika/parser/http/HttpParser.java    |   2 +-
 .../org/apache/tika/parser/wacz/WACZParser.java    |  18 +-
 .../org/apache/tika/parser/warc/WARCParser.java    |   2 +-
 .../apache/tika/parser/xml/FictionBookParser.java  |   2 +-
 .../tika/parser/AutoDetectParserConfigTest.java    |  20 +-
 ...a-config-upcasing-custom-handler-decorator.json |  13 --
 .../configs/tika-config-write-filter.json          |  16 +-
 .../apache/tika/pipes/core/server/PipesServer.java |  12 +-
 .../apache/tika/pipes/core/server/PipesWorker.java |  27 ++-
 .../tika/pipes/core/MetadataWriteLimiterTest.java  | 131 ++++++++++++
 .../configs/tika-config-write-limiter.json         |  64 ++++++
 .../apache/tika/config/loader/TikaJsonConfig.java  |   1 +
 .../org/apache/tika/config/loader/TikaLoader.java  |   8 +
 .../org/apache/tika/serialization/TikaModule.java  |   4 +-
 ...rTest.java => StandardMetadataLimiterTest.java} |  74 ++++---
 .../test/resources/configs/TIKA-3695-exclude.json  |   8 +-
 .../test/resources/configs/TIKA-3695-fields.json   |  12 +-
 .../src/test/resources/configs/TIKA-3695.json      |  12 +-
 .../server/core/resource/DetectorResource.java     |   4 +-
 .../server/core/resource/MetadataResource.java     |  15 +-
 .../server/core/resource/PipesParsingHelper.java   |   3 +-
 .../core/resource/RecursiveMetadataResource.java   |  12 +-
 .../tika/server/core/resource/TikaResource.java    |  93 ++++++---
 .../server/core/resource/UnpackerResource.java     |   8 +-
 .../standard/resource/XMPMetadataResource.java     |   7 +-
 88 files changed, 1107 insertions(+), 514 deletions(-)

diff --cc tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
index ffb95dc609,351aa49a65..2d8e7ca21d
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParser.java
@@@ -146,14 -146,11 +146,9 @@@ public class AutoDetectParser extends C
  
      public void parse(TikaInputStream tis, ContentHandler handler, Metadata 
metadata,
                        ParseContext context) throws IOException, SAXException, 
TikaException {
-         if (autoDetectParserConfig.getMetadataWriteFilterFactory() != null) {
-             metadata.setMetadataWriteFilter(
-                     
autoDetectParserConfig.getMetadataWriteFilterFactory().newInstance());
-         }
- 
          // Compute digests before type detection if configured
 -        DigestHelper.maybeDigest(tis,
 -                autoDetectParserConfig.digester(),
 -                autoDetectParserConfig.isSkipContainerDocumentDigest(),
 -                metadata, context);
 +        // DigesterFactory is retrieved from ParseContext (configured via 
other-configs)
 +        DigestHelper.maybeDigest(tis, metadata, context);
  
          // Automatically detect the MIME type of the document
          MediaType type = detector.detect(tis, metadata, context);
diff --cc 
tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
index c5c6632c00,4fa1d3c083..ebf359ff1c
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
@@@ -21,9 -21,10 +21,8 @@@ import java.io.Serializable
  import org.xml.sax.ContentHandler;
  
  import org.apache.tika.config.TikaComponent;
 -import org.apache.tika.digest.Digester;
 -import org.apache.tika.digest.DigesterFactory;
  import org.apache.tika.extractor.EmbeddedDocumentExtractorFactory;
  import org.apache.tika.metadata.Metadata;
- import org.apache.tika.metadata.writefilter.MetadataWriteFilterFactory;
  import org.apache.tika.sax.ContentHandlerDecoratorFactory;
  
  /**
@@@ -172,10 -238,10 +160,9 @@@ public class AutoDetectParserConfig imp
          return "AutoDetectParserConfig{" + "outputThreshold=" +
                  outputThreshold + ", maximumCompressionRatio=" + 
maximumCompressionRatio +
                  ", maximumDepth=" + maximumDepth + ", 
maximumPackageEntryDepth=" +
-                 maximumPackageEntryDepth + ", metadataWriteFilterFactory=" +
-                 metadataWriteFilterFactory + ", 
embeddedDocumentExtractorFactory=" +
+                 maximumPackageEntryDepth + ", 
embeddedDocumentExtractorFactory=" +
                  embeddedDocumentExtractorFactory + ", 
contentHandlerDecoratorFactory=" +
 -                contentHandlerDecoratorFactory + ", digesterFactory=" + 
digesterFactory +
 -                ", skipContainerDocumentDigest=" + 
skipContainerDocumentDigest +
 +                contentHandlerDecoratorFactory +
                  ", throwOnZeroBytes=" + throwOnZeroBytes + '}';
      }
  }
diff --cc tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
index 78c8eb2e9a,ae1ecc3bbb..f5338594ff
--- a/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/ParseContext.java
@@@ -222,32 -224,31 +224,57 @@@ public class ParseContext implements Se
          return context.isEmpty() && jsonConfigs.isEmpty();
      }
  
 +    /**
 +     * Copies all entries from the source ParseContext into this one.
 +     * Existing entries in this context are overwritten by source entries.
 +     * <p>
 +     * This copies both typed objects (from context map) and JSON configs.
 +     *
 +     * @param source the ParseContext to copy from
 +     * @since Apache Tika 4.0
 +     */
 +    public void copyFrom(ParseContext source) {
 +        if (source == null) {
 +            return;
 +        }
 +        // Copy typed objects
 +        context.putAll(source.context);
 +        // Copy JSON configs
 +        jsonConfigs.putAll(source.jsonConfigs);
 +        // Copy resolved configs (if any)
 +        if (source.resolvedConfigs != null && 
!source.resolvedConfigs.isEmpty()) {
 +            if (resolvedConfigs == null) {
 +                resolvedConfigs = new HashMap<>();
 +            }
 +            resolvedConfigs.putAll(source.resolvedConfigs);
 +        }
 +    }
 +
+     /**
+      * Creates a new Metadata object with any configured limits applied.
+      * <p>
+      * If a {@link MetadataWriteLimiterFactory} is configured in this 
ParseContext, the returned
+      * Metadata will have a write limiter that enforces those limits. 
Otherwise,
+      * returns a plain Metadata object.
+      * <p>
+      * Parsers should use this method instead of {@code new Metadata()} when 
creating
+      * metadata for embedded documents, to ensure limits are applied at 
creation time
+      * rather than later during parsing.
+      * <p>
+      * Example usage:
+      * <pre>
+      * Metadata embeddedMetadata = context.newMetadata();
+      * embeddedMetadata.set(TikaCoreProperties.RESOURCE_NAME_KEY, name);
+      * // limits are already applied, no data bypasses the limiter
+      * </pre>
+      *
+      * @return a new Metadata object, with limits applied if configured
+      * @since Apache Tika 4.0
+      */
+     public Metadata newMetadata() {
+         MetadataWriteLimiterFactory factory = 
get(MetadataWriteLimiterFactory.class);
+         return factory != null ? new Metadata(factory.newInstance()) : new 
Metadata();
+     }
  
  
      /**
diff --cc 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
index bb7acf39e9,48314a2ab4..6a1e6a925a
--- 
a/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
+++ 
b/tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/tika-config-write-filter.json
@@@ -1,25 -1,25 +1,25 @@@
  {
    "auto-detect-parser": {
      "outputThreshold": 1000000,
-     "metadataWriteFilterFactory": {
-       "standard-write-filter-factory": {
-         "includeFields": [
-          "X-TIKA-CONTENT",
-           "dc:creator"
-         ]
-       }
-     },
 -    "skipContainerDocumentDigest": true,
 -    "digesterFactory": {
 +    "throwOnZeroBytes": false
 +  },
 +  "other-configs": {
 +    "digester-factory": {
        "commons-digester-factory": {
          "digests": [
            { "algorithm": "SHA256", "encoding": "BASE32" },
            { "algorithm": "MD5" }
 -        ]
 +        ],
 +        "skipContainerDocumentDigest": true
        }
+     },
 -    "throwOnZeroBytes": false
 -  },
 -  "other-configs": {
+     "metadata-write-limiter-factory": {
+       "standard-metadata-limiter-factory": {
+         "includeFields": [
+           "X-TIKA:content",
+           "dc:creator"
+         ]
+       }
      }
    }
  }
diff --cc 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
index a91e66823a,8cf9308577..1b6897edbb
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesServer.java
@@@ -58,8 -59,8 +58,9 @@@ import org.apache.tika.exception.TikaEx
  import org.apache.tika.extractor.RUnpackExtractorFactory;
  import org.apache.tika.metadata.Metadata;
  import org.apache.tika.metadata.filter.MetadataFilter;
+ import org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory;
  import org.apache.tika.parser.AutoDetectParser;
 +import org.apache.tika.parser.ParseContext;
  import org.apache.tika.parser.RecursiveParserWrapper;
  import org.apache.tika.pipes.api.FetchEmitTuple;
  import org.apache.tika.pipes.api.PipesResult;
@@@ -343,7 -346,7 +348,7 @@@ public class PipesServer implements Aut
          Long thresholdBytes = 
pipesConfig.getEmitStrategy().getThresholdBytes();
          long threshold = (thresholdBytes != null) ? thresholdBytes : 
EmitStrategyConfig.DEFAULT_DIRECT_EMIT_THRESHOLD_BYTES;
          EmitHandler emitHandler = new EmitHandler(defaultMetadataFilter, 
emitStrategy, emitterManager, threshold);
-         PipesWorker pipesWorker = new PipesWorker(fetchEmitTuple, 
mergedContext, autoDetectParser, emitterManager, fetchHandler, parseHandler, 
emitHandler);
 -        PipesWorker pipesWorker = new PipesWorker(fetchEmitTuple, 
autoDetectParser, emitterManager, fetchHandler, parseHandler, emitHandler, 
defaultMetadataWriteLimiterFactory);
++        PipesWorker pipesWorker = new PipesWorker(fetchEmitTuple, 
mergedContext, autoDetectParser, emitterManager, fetchHandler, parseHandler, 
emitHandler, defaultMetadataWriteLimiterFactory);
          return pipesWorker;
      }
  
@@@ -474,25 -491,9 +479,26 @@@
          }
          this.detector = this.autoDetectParser.getDetector();
          this.rMetaParser = new RecursiveParserWrapper(autoDetectParser);
+ 
      }
  
 +    /**
 +     * Creates a merged ParseContext with defaults from tika-config overlaid 
with request values.
 +     * Request values take precedence over defaults.
 +     * <p>
 +     * Creates a fresh context each time to avoid shared state between 
requests.
 +     *
 +     * @param requestContext the ParseContext from FetchEmitTuple
 +     * @return a new ParseContext with defaults + request overrides
 +     */
 +    private ParseContext createMergedParseContext(ParseContext 
requestContext) throws TikaConfigException {
 +        // Create fresh context with defaults from tika-config (e.g., 
DigesterFactory)
 +        ParseContext mergedContext = tikaLoader.loadParseContext();
 +        // Overlay request's values (request takes precedence)
 +        mergedContext.copyFrom(requestContext);
 +        return mergedContext;
 +    }
 +
      private ConfigStore createConfigStore(PipesConfig pipesConfig, 
TikaPluginManager tikaPluginManager) throws TikaException {
          String configStoreType = pipesConfig.getConfigStoreType();
          String configStoreParams = pipesConfig.getConfigStoreParams();
diff --cc 
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
index 57733d38d5,18b83192ac..df54ea0042
--- 
a/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
+++ 
b/tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java
@@@ -57,12 -57,11 +58,13 @@@ class PipesWorker implements Callable<P
      private final FetchHandler fetchHandler;
      private final ParseHandler parseHandler;
      private final EmitHandler emitHandler;
+     private final MetadataWriteLimiterFactory 
defaultMetadataWriteLimiterFactory;
  
 -    public PipesWorker(FetchEmitTuple fetchEmitTuple, AutoDetectParser 
autoDetectParser, EmitterManager emitterManager, FetchHandler fetchHandler, 
ParseHandler parseHandler,
 +    public PipesWorker(FetchEmitTuple fetchEmitTuple, ParseContext 
parseContext, AutoDetectParser autoDetectParser,
 +                       EmitterManager emitterManager, FetchHandler 
fetchHandler, ParseHandler parseHandler,
-                        EmitHandler emitHandler) {
+                        EmitHandler emitHandler, MetadataWriteLimiterFactory 
defaultMetadataWriteLimiterFactory) {
          this.fetchEmitTuple = fetchEmitTuple;
 +        this.parseContext = parseContext;
          this.autoDetectParser = autoDetectParser;
          this.emitterManager = emitterManager;
          this.fetchHandler = fetchHandler;
@@@ -109,22 -109,22 +112,23 @@@
          //start a new metadata object to gather info from the fetch process
          //we want to isolate and not touch the metadata sent into the 
fetchEmitTuple
          //so that we can inject it after the filter at the very end
-         Metadata metadata = new Metadata();
-         FetchHandler.TisOrResult tisOrResult = 
fetchHandler.fetch(fetchEmitTuple, metadata, parseContext);
-         if (tisOrResult.pipesResult() != null) {
-             return new ParseDataOrPipesResult(null, 
tisOrResult.pipesResult());
-         }
- 
 -        ParseContext parseContext = null;
 +        ParseContext localContext = null;
          try {
 -            parseContext = setupParseContext(fetchEmitTuple);
 +            localContext = setupParseContext();
          } catch (IOException e) {
              LOG.warn("fetcher initialization exception id={}", 
fetchEmitTuple.getId(), e);
              return new ParseDataOrPipesResult(null,
                      new 
PipesResult(PipesResult.RESULT_STATUS.FETCHER_INITIALIZATION_EXCEPTION, 
ExceptionUtils.getStackTrace(e)));
          }
 -        Metadata metadata = parseContext.newMetadata();
 -        FetchHandler.TisOrResult tisOrResult = 
fetchHandler.fetch(fetchEmitTuple, metadata);
++        // Use newMetadata() to apply any configured write limits
++        Metadata metadata = localContext.newMetadata();
++        FetchHandler.TisOrResult tisOrResult = 
fetchHandler.fetch(fetchEmitTuple, metadata, localContext);
+         if (tisOrResult.pipesResult() != null) {
+             return new ParseDataOrPipesResult(null, 
tisOrResult.pipesResult());
+         }
+ 
          try (TikaInputStream tis = tisOrResult.tis()) {
 -            return parseHandler.parseWithStream(fetchEmitTuple, tis, 
metadata, parseContext);
 +            return parseHandler.parseWithStream(fetchEmitTuple, tis, 
metadata, localContext);
          } catch (SecurityException e) {
              LOG.error("security exception id={}", fetchEmitTuple.getId(), e);
              throw e;
@@@ -137,9 -137,18 +141,17 @@@
  
  
  
 -    private ParseContext setupParseContext(FetchEmitTuple fetchEmitTuple) 
throws TikaException, IOException {
 -        ParseContext parseContext = fetchEmitTuple.getParseContext();
 +    private ParseContext setupParseContext() throws TikaException, 
IOException {
          // ContentHandlerFactory and ParseMode are retrieved from 
ParseContext in ParseHandler.
          // They are set in ParseContext from PipesConfig loaded via 
TikaLoader at startup.
+ 
+         // If the parseContext from the FetchEmitTuple doesn't have a 
MetadataWriteLimiterFactory,
+         // use the default one loaded from config in PipesServer
+         MetadataWriteLimiterFactory existingFactory = 
parseContext.get(MetadataWriteLimiterFactory.class);
+         if (existingFactory == null && defaultMetadataWriteLimiterFactory != 
null) {
+             parseContext.set(MetadataWriteLimiterFactory.class, 
defaultMetadataWriteLimiterFactory);
+         }
+ 
          EmbeddedDocumentBytesConfig embeddedDocumentBytesConfig = 
parseContext.get(EmbeddedDocumentBytesConfig.class);
          if (embeddedDocumentBytesConfig == null) {
              //make sure there's one here -- or do we make this default in 
fetchemit tuple?
diff --cc 
tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
index 389f33697d,b527532e5b..55f6ff0993
--- 
a/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
+++ 
b/tika-serialization/src/main/java/org/apache/tika/config/loader/TikaLoader.java
@@@ -43,6 -42,6 +43,7 @@@ import org.apache.tika.language.transla
  import org.apache.tika.metadata.filter.CompositeMetadataFilter;
  import org.apache.tika.metadata.filter.MetadataFilter;
  import org.apache.tika.metadata.filter.NoOpFilter;
++import org.apache.tika.metadata.writefilter.MetadataWriteLimiterFactory;
  import org.apache.tika.mime.MediaTypeRegistry;
  import org.apache.tika.mime.MimeTypes;
  import org.apache.tika.parser.AutoDetectParser;
@@@ -378,39 -376,6 +379,46 @@@ public class TikaLoader 
          return autoDetectParser;
      }
  
 +    /**
 +     * Loads and returns a ParseContext populated with components from the 
"other-configs" section.
 +     * <p>
 +     * This method loads components that should be passed via ParseContext, 
such as:
 +     * <ul>
 +     *   <li>DigesterFactory (from "digester-factory")</li>
++     *   <li>MetadataWriteLimiterFactory (from 
"metadata-write-limiter-factory")</li>
 +     * </ul>
 +     * <p>
 +     * Use this method when you need a pre-configured ParseContext for 
parsing operations.
 +     *
 +     * <p>Example usage:
 +     * <pre>
 +     * TikaLoader loader = TikaLoader.load(configPath);
 +     * Parser parser = loader.loadAutoDetectParser();
 +     * ParseContext context = loader.loadParseContext();
 +     * parser.parse(stream, handler, metadata, context);
 +     * </pre>
 +     *
 +     * @return a ParseContext populated with configured components
 +     * @throws TikaConfigException if loading fails
 +     */
 +    public ParseContext loadParseContext() throws TikaConfigException {
 +        ParseContext context = new ParseContext();
 +
 +        // Load DigesterFactory from other-configs if present
 +        DigesterFactory digesterFactory = configs().load("digester-factory", 
DigesterFactory.class);
 +        if (digesterFactory != null) {
 +            context.set(DigesterFactory.class, digesterFactory);
 +        }
 +
++        // Load MetadataWriteLimiterFactory from other-configs if present
++        MetadataWriteLimiterFactory metadataWriteLimiterFactory = 
configs().load(MetadataWriteLimiterFactory.class);
++        if (metadataWriteLimiterFactory != null) {
++            context.set(MetadataWriteLimiterFactory.class, 
metadataWriteLimiterFactory);
++        }
++
 +        return context;
 +    }
 +
      /**
       * Returns a ConfigLoader for loading simple configuration objects.
       * <p>
diff --cc 
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
index e3e4aebae6,c84676c14b..7101105ba8
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
@@@ -76,9 -77,8 +77,9 @@@ public class MetadataResource 
              @Context HttpHeaders httpHeaders,
              @Context UriInfo info) throws Exception {
  
-         Metadata metadata = new Metadata();
 +        // Load default context from config, then overlay with request config
-         ParseContext context = 
TikaResource.getTikaLoader().loadParseContext();
+         ParseContext context = TikaResource.createParseContext();
+         Metadata metadata = context.newMetadata();
          try (TikaInputStream tis = setupMultipartConfig(attachments, 
metadata, context)) {
              // No need to parse embedded docs for metadata-only extraction
              context.set(DocumentSelector.class, metadata1 -> false);
@@@ -169,8 -171,7 +172,8 @@@
  
      protected Metadata parseMetadata(TikaInputStream tis, Metadata metadata, 
MultivaluedMap<String, String> httpHeaders, UriInfo info)
              throws IOException, TikaConfigException {
 +        // Load default context from config (includes DigesterFactory from 
other-configs)
-         final ParseContext context = 
TikaResource.getTikaLoader().loadParseContext();
+         final ParseContext context = TikaResource.createParseContext();
          Parser parser = TikaResource.createParser();
          fillMetadata(parser, metadata, httpHeaders);
          //no need to parse embedded docs
diff --cc 
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
index 287cd95ced,968bd83f99..31a5817ad8
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/TikaResource.java
@@@ -107,6 -114,21 +114,22 @@@ public class TikaResource 
          return PIPES_PARSING_HELPER;
      }
  
+     /**
 -     * Creates a new ParseContext with the default 
MetadataWriteLimiterFactory set if configured.
 -     * This should be used instead of {@code createParseContext()} to ensure 
metadata limits
 -     * are applied when configured.
++     * Creates a new ParseContext with defaults loaded from tika-config.
++     * This loads components from "other-configs" such as DigesterFactory and 
MetadataWriteLimiterFactory.
+      *
+      * @return a new ParseContext with defaults applied
+      */
+     public static ParseContext createParseContext() {
 -        ParseContext context = new ParseContext();
 -        if (DEFAULT_METADATA_WRITE_LIMITER_FACTORY != null) {
 -            context.set(MetadataWriteLimiterFactory.class, 
DEFAULT_METADATA_WRITE_LIMITER_FACTORY);
++        try {
++            return TIKA_LOADER.loadParseContext();
++        } catch (TikaConfigException e) {
++            // Fall back to empty context if loading fails
++            LOG.warn("Failed to load ParseContext from config, using empty 
context", e);
++            return new ParseContext();
+         }
 -        return context;
+     }
+ 
  
      @SuppressWarnings("serial")
      public static Parser createParser() throws TikaConfigException, 
IOException {

Reply via email to