This is an automated email from the ASF dual-hosted git repository.

tallison pushed a change to branch TIKA-4665-inference-module
in repository https://gitbox.apache.org/repos/asf/tika.git


    from bde2445734 TIKA-4665 - add OpenAI-based inference handling with 
chunking
     add 8918c66384 TIKA-4664 - add Poppler renderer, replace MuPDF, add OCR 
safety limits (#2612)
     add 2c98c63677 TIKA-4666 - add VLM parsers (Claude, Gemini, OpenAI) (#2614)
     add 314a6e7fea TIKA-4668 -- modernize versioning with $revision (#2616)
     add f09f148c6b merge from main and fix conflicts

No new revisions were added by this update.

Summary of changes:
 .gitignore                                         |   1 +
 docs/modules/ROOT/examples/claude-vlm-basic.json   |  10 +
 docs/modules/ROOT/examples/claude-vlm-full.json    |  18 +
 docs/modules/ROOT/examples/gemini-vlm-basic.json   |  10 +
 docs/modules/ROOT/examples/gemini-vlm-full.json    |  18 +
 docs/modules/ROOT/examples/openai-vlm-basic.json   |  11 +
 docs/modules/ROOT/examples/openai-vlm-full.json    |  18 +
 docs/modules/ROOT/examples/vlm-pdf-parsing.json    |  16 +
 docs/modules/ROOT/nav.adoc                         |   2 +
 docs/modules/ROOT/pages/advanced/index.adoc        |   2 +
 .../ROOT/pages/advanced/local-vlm-server.adoc      | 445 ++++++++++++++++++++
 .../pages/configuration/parsers/vlm-parsers.adoc   | 236 +++++++++++
 docs/pom.xml                                       |   2 +-
 pom.xml                                            |   2 +-
 tika-annotation-processor/pom.xml                  |   2 +-
 tika-app/pom.xml                                   |   2 +-
 tika-bom/pom.xml                                   | 142 +++----
 tika-bundles/pom.xml                               |   4 +-
 tika-bundles/tika-bundle-standard/pom.xml          |   4 +-
 tika-core/pom.xml                                  |   2 +-
 tika-detectors/pom.xml                             |   2 +-
 tika-detectors/tika-detector-magika/pom.xml        |   2 +-
 tika-detectors/tika-detector-siegfried/pom.xml     |   2 +-
 tika-e2e-tests/pom.xml                             |  32 +-
 tika-e2e-tests/tika-grpc/pom.xml                   |   2 +-
 tika-eval/pom.xml                                  |   2 +-
 tika-eval/tika-eval-app/pom.xml                    |   2 +-
 tika-eval/tika-eval-core/pom.xml                   |   2 +-
 tika-example/pom.xml                               |   2 +-
 tika-grpc/pom.xml                                  |   2 +-
 tika-handlers/pom.xml                              |   2 +-
 tika-handlers/tika-handler-boilerpipe/pom.xml      |   2 +-
 tika-integration-tests/pom.xml                     |   2 +-
 .../tika-pipes-kafka-integration-tests/pom.xml     |   2 +-
 .../pom.xml                                        |   2 +-
 .../tika-pipes-s3-integration-tests/pom.xml        |   2 +-
 .../tika-pipes-solr-integration-tests/pom.xml      |   2 +-
 .../tika-resource-loading-tests/pom.xml            |   2 +-
 tika-integration-tests/tika-woodstox-tests/pom.xml |   2 +-
 tika-java7/pom.xml                                 |   2 +-
 tika-langdetect/pom.xml                            |   2 +-
 tika-langdetect/tika-langdetect-lingo24/pom.xml    |   2 +-
 tika-langdetect/tika-langdetect-mitll-text/pom.xml |   2 +-
 tika-langdetect/tika-langdetect-opennlp/pom.xml    |   2 +-
 tika-langdetect/tika-langdetect-optimaize/pom.xml  |   2 +-
 .../tika-langdetect-test-commons/pom.xml           |   2 +-
 tika-langdetect/tika-langdetect-tika/pom.xml       |   2 +-
 tika-parent/pom.xml                                |  31 +-
 tika-parsers/pom.xml                               |   2 +-
 tika-parsers/tika-parsers-extended/pom.xml         |   2 +-
 .../tika-parser-scientific-module/pom.xml          |   2 +-
 .../tika-parser-scientific-package/pom.xml         |   2 +-
 .../tika-parser-sqlite3-module/pom.xml             |   2 +-
 .../tika-parser-sqlite3-package/pom.xml            |   2 +-
 .../pom.xml                                        |   2 +-
 tika-parsers/tika-parsers-ml/pom.xml               |   3 +-
 .../tika-parsers-ml/tika-parser-nlp-module/pom.xml |   2 +-
 .../tika-parser-nlp-package/pom.xml                |   2 +-
 .../pom.xml                                        |  24 +-
 .../apache/tika/parser/vlm/AbstractVLMParser.java  | 464 +++++++++++++++++++++
 .../apache/tika/parser/vlm/ClaudeVLMParser.java    | 227 ++++++++++
 .../apache/tika/parser/vlm/GeminiVLMParser.java    | 238 +++++++++++
 .../tika/parser/vlm/MarkdownToXHTMLEmitter.java    | 409 ++++++++++++++++++
 .../apache/tika/parser/vlm/OpenAIVLMParser.java    | 266 ++++++++++++
 .../org/apache/tika/parser/vlm/VLMOCRConfig.java   | 307 ++++++++++++++
 .../tika/parser/vlm/ClaudeVLMParserTest.java       | 285 +++++++++++++
 .../tika/parser/vlm/GeminiVLMParserTest.java       | 260 ++++++++++++
 .../parser/vlm/MarkdownToXHTMLEmitterTest.java     | 253 +++++++++++
 .../tika/parser/vlm/OpenAIVLMParserTest.java       | 291 +++++++++++++
 .../tika-parsers-ml/tika-transcribe-aws/pom.xml    |   2 +-
 tika-parsers/tika-parsers-standard/pom.xml         |   2 +-
 .../tika-parsers-standard-modules/pom.xml          |   2 +-
 .../tika-parser-apple-module/pom.xml               |   2 +-
 .../tika-parser-audiovideo-module/pom.xml          |   2 +-
 .../tika-parser-cad-module/pom.xml                 |   2 +-
 .../tika-parser-code-module/pom.xml                |   2 +-
 .../tika-parser-crypto-module/pom.xml              |   2 +-
 .../tika-parser-digest-commons/pom.xml             |   2 +-
 .../tika-parser-font-module/pom.xml                |   2 +-
 .../tika-parser-html-module/pom.xml                |   2 +-
 .../tika-parser-image-module/pom.xml               |   2 +-
 .../tika-parser-jdbc-commons/pom.xml               |   2 +-
 .../tika-parser-mail-commons/pom.xml               |   2 +-
 .../tika-parser-mail-module/pom.xml                |   2 +-
 .../tika-parser-microsoft-module/pom.xml           |   2 +-
 .../tika-parser-miscoffice-module/pom.xml          |   2 +-
 .../tika-parser-news-module/pom.xml                |   2 +-
 .../tika-parser-ocr-module/pom.xml                 |   2 +-
 .../tika-parser-pdf-module/pom.xml                 |   2 +-
 .../apache/tika/parser/pdf/AbstractPDF2XHTML.java  |  27 ++
 .../java/org/apache/tika/parser/pdf/OcrConfig.java |  60 +++
 .../apache/tika/parser/pdf/PDFParserConfig.java    |  33 ++
 .../tika/renderer/pdf/mutool/MuPDFRenderer.java    | 150 -------
 .../tika/renderer/pdf/poppler/PopplerRenderer.java | 293 +++++++++++++
 .../renderer/pdf/poppler/PopplerRendererTest.java  | 167 ++++++++
 .../tika-parser-pkg-module/pom.xml                 |   2 +-
 .../tika-parser-text-module/pom.xml                |   2 +-
 .../tika-parser-webarchive-module/pom.xml          |   2 +-
 .../tika-parser-xml-module/pom.xml                 |   2 +-
 .../tika-parser-xmp-commons/pom.xml                |   2 +-
 .../tika-parser-zip-commons/pom.xml                |   2 +-
 .../tika-parsers-standard-package/pom.xml          |   2 +-
 .../org/apache/tika/parser/pdf/PDFParserTest.java  |  21 +-
 ...fig.json => tika-rendering-poppler-config.json} |   2 +-
 tika-pipes/pom.xml                                 |   2 +-
 tika-pipes/tika-async-cli/pom.xml                  |   2 +-
 tika-pipes/tika-httpclient-commons/pom.xml         |   2 +-
 tika-pipes/tika-pipes-api/pom.xml                  |   2 +-
 tika-pipes/tika-pipes-config-store-ignite/pom.xml  |   2 +-
 tika-pipes/tika-pipes-core/pom.xml                 |   2 +-
 tika-pipes/tika-pipes-fork-parser/pom.xml          |   2 +-
 tika-pipes/tika-pipes-integration-tests/pom.xml    |   2 +-
 tika-pipes/tika-pipes-iterator-commons/pom.xml     |   2 +-
 tika-pipes/tika-pipes-plugins/pom.xml              |   2 +-
 .../tika-pipes-atlassian-jwt/pom.xml               |   2 +-
 .../tika-pipes-plugins/tika-pipes-az-blob/pom.xml  |   2 +-
 .../tika-pipes-plugins/tika-pipes-csv/pom.xml      |   2 +-
 .../tika-pipes-file-system/pom.xml                 |   2 +-
 .../tika-pipes-plugins/tika-pipes-gcs/pom.xml      |   2 +-
 .../tika-pipes-google-drive/pom.xml                |   2 +-
 .../tika-pipes-plugins/tika-pipes-http/pom.xml     |   2 +-
 .../tika-pipes-plugins/tika-pipes-jdbc/pom.xml     |   2 +-
 .../tika-pipes-plugins/tika-pipes-json/pom.xml     |   2 +-
 .../tika-pipes-plugins/tika-pipes-kafka/pom.xml    |   2 +-
 .../tika-pipes-microsoft-graph/pom.xml             |   2 +-
 .../tika-pipes-opensearch/pom.xml                  |   2 +-
 .../tika-pipes-plugins/tika-pipes-s3/pom.xml       |   2 +-
 .../tika-pipes-plugins/tika-pipes-solr/pom.xml     |   2 +-
 tika-pipes/tika-pipes-reporter-commons/pom.xml     |   2 +-
 tika-plugins-core/pom.xml                          |   2 +-
 tika-serialization/pom.xml                         |   2 +-
 tika-server/pom.xml                                |   2 +-
 tika-server/tika-server-client/pom.xml             |   2 +-
 tika-server/tika-server-core/pom.xml               |   2 +-
 tika-server/tika-server-standard/pom.xml           |   2 +-
 tika-translate/pom.xml                             |   2 +-
 tika-xmp/pom.xml                                   |   2 +-
 137 files changed, 4634 insertions(+), 346 deletions(-)
 create mode 100644 docs/modules/ROOT/examples/claude-vlm-basic.json
 create mode 100644 docs/modules/ROOT/examples/claude-vlm-full.json
 create mode 100644 docs/modules/ROOT/examples/gemini-vlm-basic.json
 create mode 100644 docs/modules/ROOT/examples/gemini-vlm-full.json
 create mode 100644 docs/modules/ROOT/examples/openai-vlm-basic.json
 create mode 100644 docs/modules/ROOT/examples/openai-vlm-full.json
 create mode 100644 docs/modules/ROOT/examples/vlm-pdf-parsing.json
 create mode 100644 docs/modules/ROOT/pages/advanced/local-vlm-server.adoc
 create mode 100644 
docs/modules/ROOT/pages/configuration/parsers/vlm-parsers.adoc
 copy tika-parsers/tika-parsers-ml/{tika-inference => 
tika-parser-vlm-ocr-module}/pom.xml (81%)
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/ClaudeVLMParser.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/GeminiVLMParser.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/OpenAIVLMParser.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/VLMOCRConfig.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/ClaudeVLMParserTest.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/GeminiVLMParserTest.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitterTest.java
 create mode 100644 
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/OpenAIVLMParserTest.java
 delete mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/mutool/MuPDFRenderer.java
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/poppler/PopplerRenderer.java
 create mode 100644 
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/renderer/pdf/poppler/PopplerRendererTest.java
 rename 
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/{tika-rendering-mupdf-config.json
 => tika-rendering-poppler-config.json} (85%)

Reply via email to