This is an automated email from the ASF dual-hosted git repository. tallison pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/tika.git
commit d0f76be3a5c1f835ec49a5ce33e44acd7e9a2e6b Merge: d7781ecf33 bd1c7edc5f Author: tallison <[email protected]> AuthorDate: Fri Jan 23 17:10:15 2026 -0500 Merge remote-tracking branch 'origin/main' docs/antora-playbook.yml | 66 +++++ docs/antora.yml | 24 ++ docs/assets/logos/asf-tika-logos.zip | Bin 0 -> 446228 bytes .../ROOT/examples/migration-full-example.json | 1 + docs/modules/ROOT/examples/pdf-parser-basic.json | 1 + docs/modules/ROOT/examples/pdf-parser-full.json | 1 + docs/modules/ROOT/examples/tesseract-basic.json | 1 + docs/modules/ROOT/examples/tesseract-full.json | 1 + docs/modules/ROOT/nav.adoc | 41 ++++ .../ROOT/pages/advanced/embedded-documents.adoc | 252 +++++++++++++++++++ .../ROOT/pages}/advanced/index.adoc | 5 +- docs/modules/ROOT/pages/advanced/robustness.adoc | 137 +++++++++++ docs/modules/ROOT/pages/advanced/spooling.adoc | 229 +++++++++++++++++ .../ROOT/pages/configuration}/index.adoc | 24 +- .../pages/configuration/parsers/pdf-parser.adoc | 43 ++++ .../parsers/tesseract-ocr-parser.adoc | 67 +++++ .../index.adoc => modules/ROOT/pages/faq.adoc} | 17 +- docs/modules/ROOT/pages/index.adoc | 43 ++++ .../ROOT/pages/maintainers}/index.adoc | 19 +- .../pages/maintainers/release-guides/docker.adoc | 133 ++++++++++ .../pages/maintainers/release-guides/grpc.adoc} | 21 +- .../pages/maintainers/release-guides/helm.adoc | 138 +++++++++++ .../pages/maintainers/release-guides}/index.adoc | 21 +- .../pages/maintainers/release-guides/tika.adoc | 271 +++++++++++++++++++++ docs/modules/ROOT/pages/maintainers/site.adoc | 172 +++++++++++++ .../pages/migration-to-4x/design-notes-4x.adoc | 127 ++++++++++ docs/modules/ROOT/pages/migration-to-4x/index.adoc | 33 +++ .../pages/migration-to-4x/metadata-changes-4x.adoc | 121 +++++++++ .../migration-to-4x}/migrating-tika-server-4x.adoc | 2 +- .../pages/migration-to-4x/migrating-to-4x.adoc | 157 ++++++++++++ .../pages/migration-to-4x/serialization-4x.adoc | 101 ++++++++ .../ROOT/pages/pipes}/index.adoc | 24 +- docs/modules/ROOT/pages/roadmap.adoc | 96 ++++++++ .../ROOT/pages/security.adoc} | 23 +- docs/modules/ROOT/pages/using-tika/cli/index.adoc | 134 ++++++++++ .../ROOT/pages/using-tika/grpc}/index.adoc | 19 +- .../ROOT/pages}/using-tika/index.adoc | 18 +- .../pages/using-tika/java-api/getting-started.adoc | 130 ++++++++++ .../ROOT/pages/using-tika/java-api/index.adoc | 179 ++++++++++++++ .../ROOT/pages/using-tika/server}/index.adoc | 29 ++- docs/pom.xml | 30 +++ docs/src/main/asciidoc/advanced/index.adoc | 7 +- docs/src/main/asciidoc/using-tika/index.adoc | 6 + .../main/asciidoc/using-tika/java-api/index.adoc | 7 +- docs/supplemental-ui/css/search.css | 82 +++++++ docs/supplemental-ui/img/ASF_Tika-colour.png | Bin 0 -> 30720 bytes docs/supplemental-ui/img/ASF_Tika-colour.svg | 109 +++++++++ docs/supplemental-ui/js/search.js | 119 +++++++++ docs/supplemental-ui/partials/footer-content.hbs | 3 + docs/supplemental-ui/partials/footer-scripts.hbs | 1 + docs/supplemental-ui/partials/head-scripts.hbs | 1 + docs/supplemental-ui/partials/header-content.hbs | 29 +++ pom.xml | 3 + .../ParsingEmbeddedDocumentExtractor.java | 14 ++ .../java/org/apache/tika/io/FilenameUtils.java | 14 +- .../main/java/org/apache/tika/metadata/PST.java | 1 - .../java/org/apache/tika/parser/ParseRecord.java | 120 +++++++++ .../apache/tika/parser/RecursiveParserWrapper.java | 14 ++ .../sax/AbstractRecursiveParserWrapperHandler.java | 2 + .../tika/parser/microsoft/libpst/EmailVisitor.java | 9 +- .../microsoft/ooxml/AbstractOOXMLExtractor.java | 7 +- .../parser/microsoft/pst/OutlookPSTParser.java | 7 +- .../parser/microsoft/libpst/TestLibPstParser.java | 11 +- .../ooxml/OOXMLContainerExtractionTest.java | 2 +- .../parser/microsoft/ooxml/OOXMLParserTest.java | 4 + .../parser/microsoft/pst/OutlookPSTParserTest.java | 4 +- .../org/apache/tika/parser/wacz/WACZParser.java | 1 + .../tika/parser/RecursiveParserWrapperTest.java | 2 +- .../org/apache/tika/parser/pkg/ZipParserTest.java | 20 +- .../tika/pipes/api/emitter/AbstractEmitter.java | 7 +- .../pipes/api/emitter/AbstractStreamEmitter.java | 7 +- .../apache/tika/pipes/api/emitter/EmitData.java | 6 +- tika-pipes/tika-pipes-core/pom.xml | 4 + .../org/apache/tika/pipes/core/PipesClient.java | 46 ++-- .../tika/pipes/core/emitter/EmitDataImpl.java | 32 +-- .../core/serialization/EmitDataDeserializer.java | 75 ++++++ .../core/serialization/EmitDataSerializer.java | 45 ++++ .../pipes/core/serialization/JsonPipesIpc.java | 88 +++++++ .../serialization/PipesResultDeserializer.java | 65 +++++ .../core/serialization/PipesResultSerializer.java | 46 ++++ .../tika/pipes/core/server/ParseHandler.java | 32 ++- .../apache/tika/pipes/core/server/PipesServer.java | 49 ++-- .../apache/tika/pipes/core/MockPassbackFilter.java | 52 ++++ .../apache/tika/pipes/core/PassbackFilterTest.java | 24 +- .../apache/tika/pipes/core/PipesClientTest.java | 24 +- .../tika/pipes/emitter/jdbc/JDBCEmitter.java | 6 +- tika-serialization/pom.xml | 5 + .../apache/tika/config/loader/FrameworkConfig.java | 11 +- .../config/loader/TikaObjectMapperFactory.java | 18 +- .../org/apache/tika/serialization/TikaModule.java | 8 +- .../serdes/ParseContextDeserializer.java | 15 +- .../serdes/ParseContextSerializer.java | 7 +- .../apache/tika/serialization/SmileFormatTest.java | 110 +++++++++ 93 files changed, 4060 insertions(+), 272 deletions(-)
