This is an automated email from the ASF dual-hosted git repository.
tallison pushed a change to branch TIKA-4667-tess4j
in repository https://gitbox.apache.org/repos/asf/tika.git
discard 82712c4f91 Fix Tess4J CI failures: probe native lib at init, fix
default assertion
discard 1e421831d4 TIKA-4667 - add Tess4J in-process OCR parser and docs
add 8918c66384 TIKA-4664 - add Poppler renderer, replace MuPDF, add OCR
safety limits (#2612)
add 2c98c63677 TIKA-4666 - add VLM parsers (Claude, Gemini, OpenAI) (#2614)
add 0940496012 TIKA-4667 - add Tess4J in-process OCR parser and docs
add c57cd02b02 Fix Tess4J CI failures: probe native lib at init, fix
default assertion
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (82712c4f91)
\
N -- N -- N refs/heads/TIKA-4667-tess4j (c57cd02b02)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
No new revisions were added by this update.
Summary of changes:
docs/modules/ROOT/examples/claude-vlm-basic.json | 10 +
docs/modules/ROOT/examples/claude-vlm-full.json | 18 +
docs/modules/ROOT/examples/gemini-vlm-basic.json | 10 +
docs/modules/ROOT/examples/gemini-vlm-full.json | 18 +
docs/modules/ROOT/examples/openai-vlm-basic.json | 11 +
docs/modules/ROOT/examples/openai-vlm-full.json | 18 +
docs/modules/ROOT/examples/vlm-pdf-parsing.json | 16 +
docs/modules/ROOT/nav.adoc | 2 +
docs/modules/ROOT/pages/advanced/index.adoc | 2 +
.../ROOT/pages/advanced/local-vlm-server.adoc | 445 ++++++++++++++++++++
.../pages/configuration/parsers/vlm-parsers.adoc | 236 +++++++++++
tika-parsers/tika-parsers-ml/pom.xml | 1 +
.../pom.xml | 68 ++-
.../apache/tika/parser/vlm/AbstractVLMParser.java | 464 +++++++++++++++++++++
.../apache/tika/parser/vlm/ClaudeVLMParser.java | 227 ++++++++++
.../apache/tika/parser/vlm/GeminiVLMParser.java | 238 +++++++++++
.../tika/parser/vlm/MarkdownToXHTMLEmitter.java | 409 ++++++++++++++++++
.../apache/tika/parser/vlm/OpenAIVLMParser.java | 266 ++++++++++++
.../org/apache/tika/parser/vlm/VLMOCRConfig.java | 307 ++++++++++++++
.../tika/parser/vlm/ClaudeVLMParserTest.java | 285 +++++++++++++
.../tika/parser/vlm/GeminiVLMParserTest.java | 260 ++++++++++++
.../parser/vlm/MarkdownToXHTMLEmitterTest.java | 253 +++++++++++
.../tika/parser/vlm/OpenAIVLMParserTest.java | 291 +++++++++++++
.../apache/tika/parser/pdf/AbstractPDF2XHTML.java | 27 ++
.../java/org/apache/tika/parser/pdf/OcrConfig.java | 60 +++
.../apache/tika/parser/pdf/PDFParserConfig.java | 33 ++
.../tika/renderer/pdf/mutool/MuPDFRenderer.java | 150 -------
.../tika/renderer/pdf/poppler/PopplerRenderer.java | 293 +++++++++++++
.../renderer/pdf/poppler/PopplerRendererTest.java | 167 ++++++++
.../org/apache/tika/parser/pdf/PDFParserTest.java | 21 +-
...fig.json => tika-rendering-poppler-config.json} | 2 +-
31 files changed, 4434 insertions(+), 174 deletions(-)
create mode 100644 docs/modules/ROOT/examples/claude-vlm-basic.json
create mode 100644 docs/modules/ROOT/examples/claude-vlm-full.json
create mode 100644 docs/modules/ROOT/examples/gemini-vlm-basic.json
create mode 100644 docs/modules/ROOT/examples/gemini-vlm-full.json
create mode 100644 docs/modules/ROOT/examples/openai-vlm-basic.json
create mode 100644 docs/modules/ROOT/examples/openai-vlm-full.json
create mode 100644 docs/modules/ROOT/examples/vlm-pdf-parsing.json
create mode 100644 docs/modules/ROOT/pages/advanced/local-vlm-server.adoc
create mode 100644
docs/modules/ROOT/pages/configuration/parsers/vlm-parsers.adoc
copy tika-parsers/tika-parsers-ml/{tika-parser-tess4j-module =>
tika-parser-vlm-ocr-module}/pom.xml (51%)
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/AbstractVLMParser.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/ClaudeVLMParser.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/GeminiVLMParser.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitter.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/OpenAIVLMParser.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/main/java/org/apache/tika/parser/vlm/VLMOCRConfig.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/ClaudeVLMParserTest.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/GeminiVLMParserTest.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/MarkdownToXHTMLEmitterTest.java
create mode 100644
tika-parsers/tika-parsers-ml/tika-parser-vlm-ocr-module/src/test/java/org/apache/tika/parser/vlm/OpenAIVLMParserTest.java
delete mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/mutool/MuPDFRenderer.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/main/java/org/apache/tika/renderer/pdf/poppler/PopplerRenderer.java
create mode 100644
tika-parsers/tika-parsers-standard/tika-parsers-standard-modules/tika-parser-pdf-module/src/test/java/org/apache/tika/renderer/pdf/poppler/PopplerRendererTest.java
rename
tika-parsers/tika-parsers-standard/tika-parsers-standard-package/src/test/resources/configs/{tika-rendering-mupdf-config.json
=> tika-rendering-poppler-config.json} (85%)