This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4635
in repository https://gitbox.apache.org/repos/asf/tika.git
from d85014a145 TIKA-4635 -- refactor DigesterFactory to be standalone
new 068976320e TIKA-4635 -- update MetadataResource
add 766cf2cd51 TIKA-4634 -- refactor metadata write filter/limiter (#2554)
new d94b8700c9 Merge remote-tracking branch 'origin/main' into TIKA-4635
The 2 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
docs/modules/ROOT/nav.adoc | 1 +
docs/modules/ROOT/pages/advanced/index.adoc | 1 +
.../ROOT/pages/advanced/setting-limits.adoc | 229 +++++++++++++++++++++
.../src/main/java/org/apache/tika/cli/TikaCLI.java | 17 +-
.../src/main/java/org/apache/tika/gui/TikaGUI.java | 7 +-
tika-core/src/main/java/org/apache/tika/Tika.java | 67 ++++--
.../tika/extractor/ParserContainerExtractor.java | 4 +-
.../java/org/apache/tika/metadata/Metadata.java | 44 ++--
...aWriteFilter.java => MetadataWriteLimiter.java} | 21 +-
...ctory.java => MetadataWriteLimiterFactory.java} | 19 +-
...iteFilter.java => StandardMetadataLimiter.java} | 94 +++------
.../StandardMetadataLimiterFactory.java | 152 ++++++++++++++
.../writefilter/StandardWriteFilterFactory.java | 127 ------------
.../org/apache/tika/parser/AutoDetectParser.java | 5 -
.../apache/tika/parser/AutoDetectParserConfig.java | 15 +-
.../java/org/apache/tika/parser/ParseContext.java | 27 +++
.../java/org/apache/tika/parser/ParsingReader.java | 21 +-
.../apache/tika/parser/journal/TEIDOMParser.java | 2 +-
.../tika/parser/apple/AppleSingleFileParser.java | 2 +-
.../org/apache/tika/parser/apple/PListParser.java | 9 +-
.../parser/iwork/iwana/IWork13PackageParser.java | 4 +-
.../executable/UniversalExecutableParser.java | 2 +-
.../org/apache/tika/parser/crypto/Pkcs7Parser.java | 2 +-
.../org/apache/tika/parser/crypto/TSDParser.java | 2 +-
.../org/apache/tika/parser/html/HtmlHandler.java | 8 +-
.../apache/tika/parser/jdbc/JDBCTableReader.java | 4 +-
.../tika/parser/mail/MailContentHandler.java | 4 +-
.../org/apache/tika/parser/mbox/MboxParser.java | 2 +-
.../parser/microsoft/AbstractPOIFSExtractor.java | 4 +-
.../apache/tika/parser/microsoft/EMFParser.java | 4 +-
.../tika/parser/microsoft/HSLFExtractor.java | 4 +-
.../tika/parser/microsoft/JackcessExtractor.java | 2 +-
.../apache/tika/parser/microsoft/OfficeParser.java | 14 +-
.../tika/parser/microsoft/OutlookExtractor.java | 12 +-
.../apache/tika/parser/microsoft/TNEFParser.java | 2 +-
.../microsoft/activemime/ActiveMimeParser.java | 2 +-
.../tika/parser/microsoft/chm/ChmParser.java | 2 +-
.../tika/parser/microsoft/libpst/EmailVisitor.java | 6 +-
.../microsoft/onenote/OneNoteTreeWalker.java | 7 +-
.../microsoft/ooxml/AbstractOOXMLExtractor.java | 8 +-
.../ooxml/XWPFWordExtractorDecorator.java | 5 +-
.../microsoft/ooxml/xps/XPSExtractorDecorator.java | 2 +-
.../microsoft/ooxml/xps/XPSPageContentHandler.java | 8 +-
.../ooxml/xwpf/ml2006/BinaryDataHandler.java | 2 +-
.../parser/microsoft/pst/OutlookPSTParser.java | 10 +-
.../parser/microsoft/pst/PSTMailItemParser.java | 6 +-
.../parser/microsoft/rtf/RTFEmbObjHandler.java | 8 +-
.../tika/parser/microsoft/xml/WordMLParser.java | 2 +-
.../org/apache/tika/parser/epub/EpubParser.java | 2 +-
.../apache/tika/parser/indesign/IDMLParser.java | 6 +-
.../parser/odf/FlatOpenDocumentMacroHandler.java | 2 +-
.../tika/parser/odf/OpenDocumentBodyHandler.java | 2 +-
.../apache/tika/parser/odf/OpenDocumentParser.java | 4 +-
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 12 +-
.../java/org/apache/tika/parser/pdf/PDF2XHTML.java | 2 +-
.../java/org/apache/tika/parser/pdf/PDFParser.java | 2 +-
.../tika/parser/pdf/image/ImageGraphicsEngine.java | 2 +-
.../tika/renderer/pdf/mutool/MuPDFRenderer.java | 2 +-
.../tika/renderer/pdf/pdfbox/PDFBoxRenderer.java | 2 +-
.../apache/tika/parser/pkg/CompressorParser.java | 2 +-
.../org/apache/tika/parser/pkg/PackageParser.java | 7 +-
.../java/org/apache/tika/parser/pkg/RarParser.java | 2 +-
.../org/apache/tika/parser/pkg/UnrarParser.java | 2 +-
.../org/apache/tika/parser/http/HttpParser.java | 2 +-
.../org/apache/tika/parser/wacz/WACZParser.java | 18 +-
.../org/apache/tika/parser/warc/WARCParser.java | 2 +-
.../apache/tika/parser/xml/FictionBookParser.java | 2 +-
.../tika/parser/AutoDetectParserConfigTest.java | 20 +-
...a-config-upcasing-custom-handler-decorator.json | 13 --
.../configs/tika-config-write-filter.json | 16 +-
.../apache/tika/pipes/core/server/PipesServer.java | 12 +-
.../apache/tika/pipes/core/server/PipesWorker.java | 27 ++-
.../tika/pipes/core/MetadataWriteLimiterTest.java | 131 ++++++++++++
...runcate.json => tika-config-write-limiter.json} | 21 +-
.../apache/tika/config/loader/TikaJsonConfig.java | 1 +
.../org/apache/tika/config/loader/TikaLoader.java | 8 +
.../org/apache/tika/serialization/TikaModule.java | 4 +-
...rTest.java => StandardMetadataLimiterTest.java} | 74 ++++---
.../test/resources/configs/TIKA-3695-exclude.json | 8 +-
.../test/resources/configs/TIKA-3695-fields.json | 12 +-
.../src/test/resources/configs/TIKA-3695.json | 12 +-
.../server/core/resource/DetectorResource.java | 4 +-
.../server/core/resource/MetadataResource.java | 17 +-
.../server/core/resource/PipesParsingHelper.java | 3 +-
.../core/resource/RecursiveMetadataResource.java | 12 +-
.../tika/server/core/resource/TikaResource.java | 93 ++++++---
.../server/core/resource/UnpackerResource.java | 8 +-
.../standard/resource/XMPMetadataResource.java | 7 +-
88 files changed, 1056 insertions(+), 524 deletions(-)
create mode 100644 docs/modules/ROOT/pages/advanced/setting-limits.adoc
rename
tika-core/src/main/java/org/apache/tika/metadata/writefilter/{MetadataWriteFilter.java
=> MetadataWriteLimiter.java} (76%)
rename
tika-core/src/main/java/org/apache/tika/metadata/writefilter/{MetadataWriteFilterFactory.java
=> MetadataWriteLimiterFactory.java} (59%)
rename
tika-core/src/main/java/org/apache/tika/metadata/writefilter/{StandardWriteFilter.java
=> StandardMetadataLimiter.java} (83%)
create mode 100644
tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardMetadataLimiterFactory.java
delete mode 100644
tika-core/src/main/java/org/apache/tika/metadata/writefilter/StandardWriteFilterFactory.java
create mode 100644
tika-pipes/tika-pipes-integration-tests/src/test/java/org/apache/tika/pipes/core/MetadataWriteLimiterTest.java
copy
tika-pipes/tika-pipes-integration-tests/src/test/resources/configs/{tika-config-truncate.json
=> tika-config-write-limiter.json} (74%)
rename
tika-serialization/src/test/java/org/apache/tika/metadata/writefilter/{StandardWriteFilterTest.java
=> StandardMetadataLimiterTest.java} (80%)