This is an automated email from the ASF dual-hosted git repository.

tallison pushed a commit to branch TIKA-4633-centralize-limits
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 887d4efa49d7f2b27d9401c464073d43b3e54c73
Merge: 14797b0851 bd1513677e
Author: tallison <[email protected]>
AuthorDate: Wed Jan 28 10:37:50 2026 -0500

    Merge remote-tracking branch 'origin/main' into TIKA-4633-centralize-limits
    
    # Conflicts:
    #       
tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
    #       
tika-pipes/tika-pipes-core/src/main/java/org/apache/tika/pipes/core/server/PipesWorker.java

 .../ROOT/pages/configuration/digesters.adoc        | 192 +++++++++++++++++++++
 docs/modules/ROOT/pages/configuration/index.adoc   |   4 +
 .../src/main/java/org/apache/tika/cli/TikaCLI.java |  18 +-
 .../java/org/apache/tika/digest/DigestDef.java     |   2 +-
 .../java/org/apache/tika/digest/DigestHelper.java  |  41 +++--
 .../org/apache/tika/digest/DigesterFactory.java    |  39 +++--
 .../org/apache/tika/parser/AutoDetectParser.java   |   6 +-
 .../apache/tika/parser/AutoDetectParserConfig.java |  80 ---------
 .../java/org/apache/tika/parser/ParseContext.java  |  26 +++
 .../parser/digestutils/BouncyCastleDigester.java   |   4 +-
 .../digestutils/BouncyCastleDigesterFactory.java   |  27 ++-
 .../tika/parser/digestutils/CommonsDigester.java   |   4 +-
 .../parser/digestutils/CommonsDigesterFactory.java |  27 ++-
 .../tika/parser/AutoDetectParserConfigTest.java    |  24 ++-
 .../apache/tika/parser/AutoDetectParserTest.java   |   6 +-
 .../tika/parser/RecursiveParserWrapperTest.java    |   9 +-
 .../tika/parser/digest/DigestConfigTest.java       |  58 ++++---
 .../digest/SkipContainerDocumentDigestTest.java    |  92 ++++++----
 .../parser/microsoft/ooxml/OOXMLParserTest.java    |   6 +-
 .../src/test/resources/configs/tika-4533.json      |   6 +-
 .../configs/tika-config-bc-digests-base32.json     |   8 +-
 .../configs/tika-config-bc-digests-basic.json      |   8 +-
 .../configs/tika-config-bc-digests-multiple.json   |   8 +-
 .../configs/tika-config-commons-digests-basic.json |   8 +-
 .../configs/tika-config-digests-pdf-only.json      |   8 +-
 .../tika-config-digests-skip-container.json        |  14 +-
 .../resources/configs/tika-config-digests.json     |  10 +-
 .../resources/configs/tika-config-md5-digest.json  |   4 +-
 ...a-config-upcasing-custom-handler-decorator.json |   9 +-
 .../configs/tika-config-write-filter.json          |  13 +-
 .../apache/tika/pipes/core/server/EmitHandler.java |   9 +-
 .../tika/pipes/core/server/FetchHandler.java       |   5 +-
 .../tika/pipes/core/server/ParseHandler.java       |  10 +-
 .../apache/tika/pipes/core/server/PipesServer.java |  57 +++---
 .../apache/tika/pipes/core/server/PipesWorker.java |  21 ++-
 .../test/resources/configs/tika-config-basic.json  |   9 +-
 .../resources/configs/tika-config-passback.json    |   9 +-
 .../resources/configs/tika-config-truncate.json    |   9 +-
 .../resources/configs/tika-config-uppercasing.json |   9 +-
 .../configs/tika-config-write-limiter.json         |   1 -
 .../org/apache/tika/config/loader/TikaLoader.java  |  43 +++++
 .../server/core/resource/MetadataResource.java     |   2 +
 .../tika/server/core/resource/TikaResource.java    |  15 +-
 .../org/apache/tika/server/core/CXFTestBase.java   |   8 +-
 .../resources/configs/cxf-test-base-template.json  |  10 +-
 .../resources/configs/cxf-test-base-template.json  |  10 +-
 .../configs/tika-config-for-server-tests.json      |   8 +-
 .../tika-config-langdetect-opennlp-filter.json     |   8 +-
 .../tika-config-langdetect-optimaize-filter.json   |   8 +-
 49 files changed, 680 insertions(+), 332 deletions(-)

diff --cc 
tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
index a8e30083fd,ebf359ff1c..2ce72443b8
--- a/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
+++ b/tika-core/src/main/java/org/apache/tika/parser/AutoDetectParserConfig.java
@@@ -56,20 -74,59 +54,9 @@@ public class AutoDetectParserConfig imp
      private ContentHandlerDecoratorFactory contentHandlerDecoratorFactory =
              NOOP_CONTENT_HANDLER_DECORATOR_FACTORY;
  
-     private DigesterFactory digesterFactory = null;
- 
-     // Lazily built digester from the factory
-     private transient Digester digester = null;
- 
-     /**
-      * If true, skip digesting for container (top-level) documents.
-      * Only embedded documents will be digested.
-      */
-     private boolean skipContainerDocumentDigest = false;
- 
      private boolean throwOnZeroBytes = true;
  
 -    /**
 -     * Creates a SecureContentHandlerConfig using the passed in parameters.
 -     *
 -     * @param outputThreshold          SecureContentHandler - character 
output threshold.
 -     * @param maximumCompressionRatio  SecureContentHandler - max compression 
ratio allowed.
 -     * @param maximumDepth             SecureContentHandler - maximum XML 
element nesting level.
 -     * @param maximumPackageEntryDepth SecureContentHandler - maximum package 
entry nesting level.
 -     */
 -    public AutoDetectParserConfig(Long outputThreshold,
 -                                  Long maximumCompressionRatio, Integer 
maximumDepth,
 -                                  Integer maximumPackageEntryDepth) {
 -        this.outputThreshold = outputThreshold;
 -        this.maximumCompressionRatio = maximumCompressionRatio;
 -        this.maximumDepth = maximumDepth;
 -        this.maximumPackageEntryDepth = maximumPackageEntryDepth;
 -    }
 -
      public AutoDetectParserConfig() {
 -
 -    }
 -
 -    public Long getOutputThreshold() {
 -        return outputThreshold;
 -    }
 -
 -    public void setOutputThreshold(Long outputThreshold) {
 -        this.outputThreshold = outputThreshold;
 -    }
 -
 -    public Long getMaximumCompressionRatio() {
 -        return maximumCompressionRatio;
 -    }
 -
 -    public void setMaximumCompressionRatio(Long maximumCompressionRatio) {
 -        this.maximumCompressionRatio = maximumCompressionRatio;
 -    }
 -
 -    public Integer getMaximumDepth() {
 -        return maximumDepth;
 -    }
 -
 -    public void setMaximumDepth(Integer maximumDepth) {
 -        this.maximumDepth = maximumDepth;
 -    }
 -
 -    public Integer getMaximumPackageEntryDepth() {
 -        return maximumPackageEntryDepth;
 -    }
 -
 -    public void setMaximumPackageEntryDepth(Integer maximumPackageEntryDepth) 
{
 -        this.maximumPackageEntryDepth = maximumPackageEntryDepth;
      }
  
      public void setEmbeddedDocumentExtractorFactory(
@@@ -165,11 -157,12 +87,9 @@@
  
      @Override
      public String toString() {
 -        return "AutoDetectParserConfig{" + "outputThreshold=" +
 -                outputThreshold + ", maximumCompressionRatio=" + 
maximumCompressionRatio +
 -                ", maximumDepth=" + maximumDepth + ", 
maximumPackageEntryDepth=" +
 -                maximumPackageEntryDepth + ", 
embeddedDocumentExtractorFactory=" +
 -                embeddedDocumentExtractorFactory + ", 
contentHandlerDecoratorFactory=" +
 -                contentHandlerDecoratorFactory +
 +        return "AutoDetectParserConfig{" +
 +                "embeddedDocumentExtractorFactory=" + 
embeddedDocumentExtractorFactory +
 +                ", contentHandlerDecoratorFactory=" + 
contentHandlerDecoratorFactory +
-                 ", digesterFactory=" + digesterFactory +
-                 ", skipContainerDocumentDigest=" + 
skipContainerDocumentDigest +
                  ", throwOnZeroBytes=" + throwOnZeroBytes + '}';
      }
  }
diff --cc 
tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
index ce9ba25026,7101105ba8..3a516d7751
--- 
a/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
+++ 
b/tika-server/tika-server-core/src/main/java/org/apache/tika/server/core/resource/MetadataResource.java
@@@ -77,8 -77,9 +77,9 @@@ public class MetadataResource 
              @Context HttpHeaders httpHeaders,
              @Context UriInfo info) throws Exception {
  
+         // Load default context from config, then overlay with request config
          ParseContext context = TikaResource.createParseContext();
 -        Metadata metadata = context.newMetadata();
 +        Metadata metadata = Metadata.newInstance(context);
          try (TikaInputStream tis = setupMultipartConfig(attachments, 
metadata, context)) {
              // No need to parse embedded docs for metadata-only extraction
              context.set(DocumentSelector.class, metadata1 -> false);

Reply via email to