This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4703-docker-ci
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 041ea74b3d6c6a2975aa22e2b8915ba46af8497b
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Fri Mar 27 09:02:45 2026 -0500

    TIKA-4703: Add Docker CI pipelines for tika-server and tika-grpc
    
    Move Docker build infrastructure into the main tika repo so that
    Docker image releases are tied directly to Tika releases rather than
    requiring cross-repo coordination with tika-docker/tika-grpc-docker.
    
    Snapshot workflow (main branch push):
    - Builds tika-server minimal and full images from Maven output
    - Builds tika-grpc image from Maven output
    - Pushes snapshot tags to Docker Hub (e.g. 4.0.0-SNAPSHOT)
    
    Release workflow (version tag push):
    - Builds tika-server minimal/full from Apache mirror JARs with GPG
      verification (multi-arch: amd64, arm64, arm/v7, s390x)
    - Builds tika-grpc from Maven output (multi-arch: amd64, arm64)
    - Pushes versioned + latest tags to Docker Hub
    
    Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
---
 .github/workflows/docker-release.yml               | 154 +++++++++++++++++++++
 .github/workflows/docker-snapshot.yml              | 147 ++++++++++++++++++++
 tika-grpc/docker-build/Dockerfile                  |  53 +++++++
 tika-grpc/docker-build/docker-build.sh             | 118 ++++++++++++++++
 tika-grpc/docker-build/start-tika-grpc.sh          |  42 ++++++
 tika-server/docker-build/docker-tool.sh            | 131 ++++++++++++++++++
 tika-server/docker-build/full/Dockerfile           |  82 +++++++++++
 tika-server/docker-build/full/Dockerfile.snapshot  |  52 +++++++
 tika-server/docker-build/minimal/Dockerfile        |  70 ++++++++++
 .../docker-build/minimal/Dockerfile.snapshot       |  34 +++++
 .../tika/parser/ocr/TesseractOCRConfig.properties  |  25 ++++
 .../customocr/tika-config-inline.xml               |  31 +++++
 .../customocr/tika-config-rendered.xml             |  38 +++++
 .../tika/parser/journal/GrobidExtractor.properties |  16 +++
 .../sample-configs/grobid/tika-config.xml          |  24 ++++
 .../sample-configs/ner/run_tika_server.sh          |  62 +++++++++
 .../sample-configs/ner/tika-config.xml             |  28 ++++
 .../vision/inception-rest-caption.xml              |  32 +++++
 .../sample-configs/vision/inception-rest-video.xml |  32 +++++
 .../sample-configs/vision/inception-rest.xml       |  32 +++++
 20 files changed, 1203 insertions(+)

diff --git a/.github/workflows/docker-release.yml 
b/.github/workflows/docker-release.yml
new file mode 100644
index 0000000000..a412c2a061
--- /dev/null
+++ b/.github/workflows/docker-release.yml
@@ -0,0 +1,154 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: Docker release - tika-server and tika-grpc
+
+on:
+  push:
+    tags:
+      - '[0-9]+.[0-9]+.[0-9]+*'
+
+jobs:
+  release-tika-server:
+    runs-on: ubuntu-latest
+    timeout-minutes: 60
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Extract version from tag
+        id: version
+        run: |
+          TAG_NAME="${GITHUB_REF#refs/tags/}"
+          echo "tag=${TAG_NAME}" >> "$GITHUB_OUTPUT"
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Set up QEMU for multi-arch
+        uses: docker/setup-qemu-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Build and push tika-server minimal
+        uses: docker/build-push-action@v6
+        with:
+          file: tika-server/docker-build/minimal/Dockerfile
+          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
+          push: true
+          build-args: |
+            TIKA_VERSION=${{ steps.version.outputs.tag }}
+          tags: |
+            apache/tika:${{ steps.version.outputs.tag }}
+            apache/tika:latest
+
+      - name: Build and push tika-server full
+        uses: docker/build-push-action@v6
+        with:
+          file: tika-server/docker-build/full/Dockerfile
+          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
+          push: true
+          build-args: |
+            TIKA_VERSION=${{ steps.version.outputs.tag }}
+          tags: |
+            apache/tika:${{ steps.version.outputs.tag }}-full
+            apache/tika:latest-full
+
+  release-tika-grpc:
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Extract version from tag
+        id: version
+        run: |
+          TAG_NAME="${GITHUB_REF#refs/tags/}"
+          echo "tag=${TAG_NAME}" >> "$GITHUB_OUTPUT"
+
+      - name: Set up JDK 17
+        uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin'
+          java-version: '17'
+          cache: 'maven'
+
+      - name: Build with Maven (skip tests)
+        run: mvn clean install -DskipTests -B 
"-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Set up QEMU for multi-arch
+        uses: docker/setup-qemu-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Prepare tika-grpc Docker build context
+        run: |
+          TIKA_VERSION="${{ steps.version.outputs.tag }}"
+          OUT_DIR=target/tika-grpc-docker
+
+          mkdir -p "${OUT_DIR}/libs" "${OUT_DIR}/plugins" "${OUT_DIR}/config" 
"${OUT_DIR}/bin"
+
+          cp "tika-grpc/target/tika-grpc-${TIKA_VERSION}.jar" 
"${OUT_DIR}/libs/"
+
+          # Copy tika-pipes plugin zip files
+          for dir in tika-pipes/tika-pipes-plugins/*/; do
+            plugin_name=$(basename "$dir")
+            zip_file="${dir}target/${plugin_name}-${TIKA_VERSION}.zip"
+            if [ -f "$zip_file" ]; then
+              cp "$zip_file" "${OUT_DIR}/plugins/"
+            fi
+          done
+
+          # Copy parser packages
+          for parser_package in \
+            "tika-parsers/tika-parsers-standard/tika-parsers-standard-package" 
\
+            
"tika-parsers/tika-parsers-extended/tika-parser-scientific-package" \
+            "tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package" \
+            "tika-parsers/tika-parsers-ml/tika-parser-nlp-package"; do
+            package_name=$(basename "$parser_package")
+            
jar_file="${parser_package}/target/${package_name}-${TIKA_VERSION}.jar"
+            if [ -f "$jar_file" ]; then
+              cp "$jar_file" "${OUT_DIR}/plugins/"
+            fi
+          done
+
+          cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/"
+          cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"
+
+      - name: Build and push tika-grpc
+        uses: docker/build-push-action@v6
+        with:
+          context: target/tika-grpc-docker
+          platforms: linux/amd64,linux/arm64
+          push: true
+          build-args: |
+            VERSION=${{ steps.version.outputs.tag }}
+          tags: |
+            apache/tika-grpc:${{ steps.version.outputs.tag }}
+            apache/tika-grpc:latest
diff --git a/.github/workflows/docker-snapshot.yml 
b/.github/workflows/docker-snapshot.yml
new file mode 100644
index 0000000000..315cba2da4
--- /dev/null
+++ b/.github/workflows/docker-snapshot.yml
@@ -0,0 +1,147 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+name: Docker snapshot - tika-server and tika-grpc
+
+on:
+  push:
+    branches: [ main ]
+    paths-ignore:
+      - 'docs/**'
+      - '*.md'
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    timeout-minutes: 120
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up JDK 17
+        uses: actions/setup-java@v4
+        with:
+          distribution: 'temurin'
+          java-version: '17'
+          cache: 'maven'
+
+      - name: Extract version from pom
+        id: version
+        run: |
+          TIKA_VERSION=$(mvn help:evaluate -Dexpression=project.version -q 
-DforceStdout)
+          echo "tika_version=${TIKA_VERSION}" >> "$GITHUB_OUTPUT"
+
+      - name: Build with Maven (skip tests)
+        run: mvn clean install -DskipTests -B 
"-Dorg.slf4j.simpleLogger.log.org.apache.maven.cli.transfer.Slf4jMavenTransferListener=warn"
+
+      - name: Set up Docker Buildx
+        uses: docker/setup-buildx-action@v3
+
+      - name: Set up QEMU for multi-arch
+        uses: docker/setup-qemu-action@v3
+
+      - name: Login to Docker Hub
+        uses: docker/login-action@v3
+        with:
+          username: ${{ secrets.DOCKERHUB_USERNAME }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      # --- tika-server (minimal) ---
+      - name: Prepare tika-server minimal build context
+        run: |
+          TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
+          OUT_DIR=target/tika-server-minimal-docker
+          mkdir -p "${OUT_DIR}"
+          cp 
"tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}.jar"
 "${OUT_DIR}/"
+          cp "tika-server/docker-build/minimal/Dockerfile.snapshot" 
"${OUT_DIR}/Dockerfile"
+
+      - name: Build and push tika-server minimal snapshot
+        uses: docker/build-push-action@v6
+        with:
+          context: target/tika-server-minimal-docker
+          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
+          push: true
+          build-args: |
+            TIKA_VERSION=${{ steps.version.outputs.tika_version }}
+          tags: |
+            apache/tika:${{ steps.version.outputs.tika_version }}
+
+      # --- tika-server (full) ---
+      - name: Prepare tika-server full build context
+        run: |
+          TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
+          OUT_DIR=target/tika-server-full-docker
+          mkdir -p "${OUT_DIR}"
+          cp 
"tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}.jar"
 "${OUT_DIR}/"
+          cp "tika-server/docker-build/full/Dockerfile.snapshot" 
"${OUT_DIR}/Dockerfile"
+
+      - name: Build and push tika-server full snapshot
+        uses: docker/build-push-action@v6
+        with:
+          context: target/tika-server-full-docker
+          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/s390x
+          push: true
+          build-args: |
+            TIKA_VERSION=${{ steps.version.outputs.tika_version }}
+          tags: |
+            apache/tika:${{ steps.version.outputs.tika_version }}-full
+
+      # --- tika-grpc ---
+      - name: Prepare tika-grpc Docker build context
+        run: |
+          TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
+          OUT_DIR=target/tika-grpc-docker
+
+          mkdir -p "${OUT_DIR}/libs" "${OUT_DIR}/plugins" "${OUT_DIR}/config" 
"${OUT_DIR}/bin"
+
+          cp "tika-grpc/target/tika-grpc-${TIKA_VERSION}.jar" 
"${OUT_DIR}/libs/"
+
+          # Copy tika-pipes plugin zip files
+          for dir in tika-pipes/tika-pipes-plugins/*/; do
+            plugin_name=$(basename "$dir")
+            zip_file="${dir}target/${plugin_name}-${TIKA_VERSION}.zip"
+            if [ -f "$zip_file" ]; then
+              cp "$zip_file" "${OUT_DIR}/plugins/"
+            fi
+          done
+
+          # Copy parser packages
+          for parser_package in \
+            "tika-parsers/tika-parsers-standard/tika-parsers-standard-package" 
\
+            
"tika-parsers/tika-parsers-extended/tika-parser-scientific-package" \
+            "tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package" \
+            "tika-parsers/tika-parsers-ml/tika-parser-nlp-package"; do
+            package_name=$(basename "$parser_package")
+            
jar_file="${parser_package}/target/${package_name}-${TIKA_VERSION}.jar"
+            if [ -f "$jar_file" ]; then
+              cp "$jar_file" "${OUT_DIR}/plugins/"
+            fi
+          done
+
+          cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/"
+          cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"
+
+      - name: Build and push tika-grpc snapshot
+        uses: docker/build-push-action@v6
+        with:
+          context: target/tika-grpc-docker
+          platforms: linux/amd64,linux/arm64
+          push: true
+          build-args: |
+            VERSION=${{ steps.version.outputs.tika_version }}
+          tags: |
+            apache/tika-grpc:${{ steps.version.outputs.tika_version }}
diff --git a/tika-grpc/docker-build/Dockerfile 
b/tika-grpc/docker-build/Dockerfile
new file mode 100644
index 0000000000..ccb77c088d
--- /dev/null
+++ b/tika-grpc/docker-build/Dockerfile
@@ -0,0 +1,53 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+FROM ubuntu:plucky
+COPY libs/ /tika/libs/
+COPY plugins/ /tika/plugins/
+COPY config/ /tika/config/
+COPY bin/ /tika/bin
+ARG JRE='openjdk-17-jre-headless'
+ARG VERSION
+ARG TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE=104857600
+ARG TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE=104857600
+ARG TIKA_GRPC_NUM_THREADS=4
+RUN set -eux \
+    && apt-get update \
+    && apt-get install --yes --no-install-recommends gnupg2 
software-properties-common \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends $JRE \
+        gdal-bin \
+        tesseract-ocr \
+        tesseract-ocr-eng \
+        tesseract-ocr-ita \
+        tesseract-ocr-fra \
+        tesseract-ocr-spa \
+        tesseract-ocr-deu \
+    && echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula 
select true | debconf-set-selections \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends \
+        xfonts-utils \
+        fonts-freefont-ttf \
+        fonts-liberation \
+        ttf-mscorefonts-installer \
+        wget \
+        cabextract \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+
+EXPOSE 9090
+ENV TIKA_VERSION=$VERSION
+ENV TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE=$TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE
+ENV TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE=$TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE
+ENV TIKA_GRPC_NUM_THREADS=$TIKA_GRPC_NUM_THREADS
+RUN chmod +x "/tika/bin/start-tika-grpc.sh"
+ENTRYPOINT ["/tika/bin/start-tika-grpc.sh"]
+
+LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/tika-grpc/docker-build/docker-build.sh 
b/tika-grpc/docker-build/docker-build.sh
new file mode 100755
index 0000000000..c522ec04fa
--- /dev/null
+++ b/tika-grpc/docker-build/docker-build.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing,
+#   software distributed under the License is distributed on an
+#   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#   KIND, either express or implied.  See the License for the
+#   specific language governing permissions and limitations
+#   under the License.
+
+# This script assembles the Docker build context for tika-grpc and builds the 
image.
+# It is intended to be run from the root of the tika repository after a Maven 
build.
+
+set -euo pipefail
+
+if [ -z "${TIKA_VERSION:-}" ]; then
+    echo "Environment variable TIKA_VERSION is required, and should match the 
maven project version of Tika"
+    exit 1
+fi
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+REPO_ROOT="${SCRIPT_DIR}/../../"
+
+cd "${REPO_ROOT}" || exit
+
+OUT_DIR=target/tika-grpc-docker
+
+MULTI_ARCH=${MULTI_ARCH:-false}
+DOCKER_ID=${DOCKER_ID:-}
+PROJECT_NAME=${PROJECT_NAME:-tika-grpc}
+
+# If RELEASE_IMAGE_TAG not specified, use TIKA_VERSION
+if [[ -z "${RELEASE_IMAGE_TAG:-}" ]]; then
+    RELEASE_IMAGE_TAG="${TIKA_VERSION}"
+    # Remove '-SNAPSHOT' from the version string
+    RELEASE_IMAGE_TAG="${RELEASE_IMAGE_TAG//-SNAPSHOT/}"
+fi
+
+mkdir -p "${OUT_DIR}/libs"
+mkdir -p "${OUT_DIR}/plugins"
+mkdir -p "${OUT_DIR}/config"
+mkdir -p "${OUT_DIR}/bin"
+cp -v -r "tika-grpc/target/tika-grpc-${TIKA_VERSION}.jar" "${OUT_DIR}/libs"
+
+# Copy all tika-pipes plugin zip files
+for dir in tika-pipes/tika-pipes-plugins/*/; do
+    plugin_name=$(basename "$dir")
+    zip_file="${dir}target/${plugin_name}-${TIKA_VERSION}.zip"
+    if [ -f "$zip_file" ]; then
+        cp -v -r "$zip_file" "${OUT_DIR}/plugins"
+    else
+        echo "WARNING: Plugin file $zip_file does not exist, skipping."
+    fi
+done
+
+# Copy parser package jars as plugins
+parser_packages=(
+    "tika-parsers/tika-parsers-standard/tika-parsers-standard-package"
+    "tika-parsers/tika-parsers-extended/tika-parser-scientific-package"
+    "tika-parsers/tika-parsers-extended/tika-parser-sqlite3-package"
+    "tika-parsers/tika-parsers-ml/tika-parser-nlp-package"
+)
+
+for parser_package in "${parser_packages[@]}"; do
+    package_name=$(basename "$parser_package")
+    jar_file="${parser_package}/target/${package_name}-${TIKA_VERSION}.jar"
+    if [ -f "$jar_file" ]; then
+        cp -v -r "$jar_file" "${OUT_DIR}/plugins"
+    else
+        echo "Parser package file $jar_file does not exist, skipping."
+    fi
+done
+
+cp -v -r "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin"
+cp -v "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"
+
+cd "${OUT_DIR}" || exit
+
+echo "Running docker build from directory: $(pwd)"
+
+IMAGE_TAGS=()
+if [[ -n "${DOCKER_ID}" ]]; then
+    IMAGE_TAGS+=("-t" "${DOCKER_ID}/${PROJECT_NAME}:${RELEASE_IMAGE_TAG}")
+fi
+
+if [ ${#IMAGE_TAGS[@]} -eq 0 ]; then
+    echo "No image tags specified. Set DOCKER_ID environment variable to 
enable Docker build."
+    exit 0
+fi
+
+if [ "${MULTI_ARCH}" == "true" ]; then
+    echo "Building multi-arch image"
+    docker buildx create --name tikabuilder --use || true
+    docker buildx build \
+        --builder=tikabuilder . \
+        "${IMAGE_TAGS[@]}" \
+        --build-arg VERSION="${TIKA_VERSION}" \
+        --platform linux/amd64,linux/arm64 \
+        --push
+    docker buildx stop tikabuilder
+    docker buildx rm tikabuilder
+else
+    echo "Building single-arch image"
+    docker build . "${IMAGE_TAGS[@]}" --build-arg VERSION="${TIKA_VERSION}"
+fi
+
+echo 
"==================================================================================================="
+echo "Done running docker build with tags: ${IMAGE_TAGS[*]}"
+echo 
"==================================================================================================="
diff --git a/tika-grpc/docker-build/start-tika-grpc.sh 
b/tika-grpc/docker-build/start-tika-grpc.sh
new file mode 100755
index 0000000000..c42c953d7b
--- /dev/null
+++ b/tika-grpc/docker-build/start-tika-grpc.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+echo "Tika Version:"
+echo "${TIKA_VERSION}"
+echo "Tika Plugins:"
+ls "/tika/plugins"
+echo "Tika gRPC Max Inbound Message Size:"
+echo "${TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE}"
+echo "Tika gRPC Max Outbound Message Size:"
+echo "${TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE}"
+echo "Tika gRPC Num Threads:"
+echo "${TIKA_GRPC_NUM_THREADS}"
+exec java \
+  -Dgrpc.server.port=9090 \
+  
"-Dgrpc.server.max-inbound-message-size=${TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE}" \
+  
"-Dgrpc.server.max-outbound-message-size=${TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE}"
 \
+  "-Dgrpc.server.numThreads=${TIKA_GRPC_NUM_THREADS}" \
+  --add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED \
+  --add-opens=java.base/jdk.internal.misc=ALL-UNNAMED \
+  --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
+  --add-opens=java.management/com.sun.jmx.mbeanserver=ALL-UNNAMED \
+  --add-opens=jdk.internal.jvmstat/sun.jvmstat.monitor=ALL-UNNAMED \
+  --add-opens=java.base/sun.reflect.generics.reflectiveObjects=ALL-UNNAMED \
+  --add-opens=java.base/java.io=ALL-UNNAMED \
+  --add-opens=java.base/java.nio=ALL-UNNAMED \
+  --add-opens=java.base/java.util=ALL-UNNAMED \
+  --add-opens=java.base/java.lang=ALL-UNNAMED \
+  -Djava.net.preferIPv4Stack=true \
+  "-Dplugins.pluginDirs=/tika/plugins" \
+  -jar "/tika/libs/tika-grpc-${TIKA_VERSION}.jar"
diff --git a/tika-server/docker-build/docker-tool.sh 
b/tika-server/docker-build/docker-tool.sh
new file mode 100755
index 0000000000..2a82b5fa34
--- /dev/null
+++ b/tika-server/docker-build/docker-tool.sh
@@ -0,0 +1,131 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#   Unless required by applicable law or agreed to in writing,
+#   software distributed under the License is distributed on an
+#   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+#   KIND, either express or implied.  See the License for the
+#   specific language governing permissions and limitations
+#   under the License.
+
+image_name=apache/tika
+
+stop_and_die() {
+  docker buildx rm tika-builder || die "couldn't stop builder -- make sure to 
stop the builder manually! "
+  die "$*"
+}
+
+die() {
+  echo "$*" >&2
+  exit 1
+}
+
+while getopts ":h" opt; do
+  case ${opt} in
+    h )
+      echo "Usage:"
+      echo "    docker-tool.sh -h                      Display this help 
message."
+      echo "    docker-tool.sh build <TIKA_DOCKER_VERSION> <TIKA_VERSION>   
Builds <TIKA_DOCKER_VERSION> images for <TIKA_VERSION>."
+      echo "    docker-tool.sh test <TIKA_DOCKER_VERSION>     Tests images for 
<TIKA_DOCKER_VERSION>."
+      echo "    docker-tool.sh publish <TIKA_DOCKER_VERSION> <TIKA_VERSION> 
Builds multi-arch images for <TIKA_DOCKER_VERSION> and pushes to Docker Hub."
+      exit 0
+      ;;
+   \? )
+     echo "Invalid Option: -$OPTARG" 1>&2
+     exit 1
+     ;;
+  esac
+done
+
+stop_test_container() {
+  container_name=$1
+  docker kill "$container_name"
+  docker rm "$container_name"
+}
+
+test_docker_image() {
+  container_name=$1
+  image=$image_name:$1
+  full=$2
+
+  docker run -d --name "$container_name" -p 127.0.0.1:9998:9998 "$image"
+  sleep 10
+  url=http://localhost:9998/
+  status=$(curl --head --location --connect-timeout 5 --write-out %{http_code} 
--silent --output /dev/null ${url})
+  user=$(docker inspect "$container_name" --format '{{.Config.User}}')
+
+  if [[ $status == '200' ]]
+  then
+    echo "$(tput setaf 2)Image: $image - Basic test passed$(tput sgr0)"
+  else
+    echo "$(tput setaf 1)Image: $image - Basic test failed$(tput sgr0)"
+    stop_test_container "$container_name"
+    exit 1
+  fi
+
+  #now test that the user is correctly set
+  if [[ $user == '35002:35002' ]]
+  then
+    echo "$(tput setaf 2)Image: $image - User passed$(tput sgr0)"
+  else
+    echo "$(tput setaf 1)Image: $image - User failed$(tput sgr0)"
+    stop_test_container "$container_name"
+    exit 1
+  fi
+
+  if [ $full == true ]
+  then
+    # Test ImageMagick is installed and runnable
+    if docker exec "$1" /usr/bin/convert -version >/dev/null
+    then
+      echo "$(tput setaf 2)Image: $image - ImageMagick passed$(tput sgr0)"
+    else
+      echo "$(tput setaf 1)Image: $image - ImageMagick failed$(tput sgr0)"
+      stop_test_container "$container_name"
+      exit 1
+    fi
+  fi
+
+  stop_test_container "$container_name"
+}
+
+shift $((OPTIND -1))
+subcommand=$1; shift
+tika_docker_version=$1; shift
+tika_version=$1; shift
+
+
+case "$subcommand" in
+  build)
+    # Build slim tika- with minimal dependencies
+    docker build -t ${image_name}:${tika_docker_version} --build-arg 
TIKA_VERSION=${tika_version} - < minimal/Dockerfile --no-cache || die "couldn't 
build minimal"
+    # Build full tika- with OCR, Fonts and GDAL
+    docker build -t ${image_name}:${tika_docker_version}-full --build-arg 
TIKA_VERSION=${tika_version} - < full/Dockerfile --no-cache || die "couldn't 
build full"
+    ;;
+
+  test)
+    # Test the images
+    test_docker_image ${tika_docker_version} false
+    test_docker_image "${tika_docker_version}-full" true
+    ;;
+
+  publish)
+    docker buildx create --use --name tika-builder || die "couldn't create 
builder"
+    # Build multi-arch with buildx and push
+    docker buildx build --platform 
linux/arm/v7,linux/arm64/v8,linux/amd64,linux/s390x --output 
"type=image,push=true" \
+      --tag ${image_name}:latest --tag ${image_name}:${tika_docker_version} 
--build-arg TIKA_VERSION=${tika_version} --no-cache --builder tika-builder 
minimal || stop_and_die "couldn't build multi-arch minimal"
+    docker buildx build --platform 
linux/arm/v7,linux/arm64/v8,linux/amd64,linux/s390x --output 
"type=image,push=true" \
+      --tag ${image_name}:latest-full --tag 
${image_name}:${tika_docker_version}-full --build-arg 
TIKA_VERSION=${tika_version} --no-cache --builder tika-builder full || 
stop_and_die "couldn't build multi-arch full"
+    docker buildx rm tika-builder || die "couldn't stop builder -- make sure 
to stop the builder manually! "
+    ;;
+
+esac
diff --git a/tika-server/docker-build/full/Dockerfile 
b/tika-server/docker-build/full/Dockerfile
new file mode 100644
index 0000000000..1b918390f6
--- /dev/null
+++ b/tika-server/docker-build/full/Dockerfile
@@ -0,0 +1,82 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+# "random" uid/gid hopefully not used anywhere else
+# This needs to be set globally and then referenced in
+# the subsequent stages -- see TIKA-3912
+ARG UID_GID="35002:35002"
+
+FROM ubuntu:plucky AS base
+
+FROM base AS fetch_tika
+
+ARG TIKA_VERSION
+ARG CHECK_SIG=true
+
+ENV 
NEAREST_TIKA_SERVER_URL="https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar";
 \
+    
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar";
 \
+    
BACKUP_TIKA_SERVER_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar";
 \
+    
DEFAULT_TIKA_SERVER_ASC_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar.asc";
 \
+    
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar.asc";
 \
+    TIKA_VERSION=$TIKA_VERSION
+
+RUN DEBIAN_FRONTEND=noninteractive apt-get update && apt-get -y install gnupg2 
wget ca-certificates \
+    && wget -t 10 --max-redirect 1 --retry-connrefused -qO- 
https://downloads.apache.org/tika/KEYS | gpg --import \
+    && wget -t 10 --max-redirect 1 --retry-connrefused 
$NEAREST_TIKA_SERVER_URL -O /tika-server-standard-${TIKA_VERSION}.jar || rm 
/tika-server-standard-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar ]" || wget 
$ARCHIVE_TIKA_SERVER_URL -O /tika-server-standard-${TIKA_VERSION}.jar || rm 
/tika-server-standard-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar ]" || wget 
$BACKUP_TIKA_SERVER_URL -O /tika-server-standard-${TIKA_VERSION}.jar || rm 
/tika-server-standard-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar ]" || exit 1 \
+    && wget -t 10 --max-redirect 1 --retry-connrefused 
$DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-standard-${TIKA_VERSION}.jar.asc  
|| rm /tika-server-standard-${TIKA_VERSION}.jar.asc \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar.asc ]" || wget 
$ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-standard-${TIKA_VERSION}.jar.asc 
|| rm /tika-server-standard-${TIKA_VERSION}.jar.asc \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar.asc ]" || exit 1 \
+    && gpg --verify /tika-server-standard-${TIKA_VERSION}.jar.asc 
/tika-server-standard-${TIKA_VERSION}.jar
+
+#RUN if [ "$CHECK_SIG" = "true" ] ; then gpg --verify 
/tika-server-standard-${TIKA_VERSION}.jar.asc 
/tika-server-standard-${TIKA_VERSION}.jar; fi
+
+FROM base AS runtime
+ARG UID_GID
+ARG JRE='openjdk-21-jre-headless'
+RUN set -eux \
+    && apt-get update \
+    && apt-get install --yes --no-install-recommends gnupg2 
software-properties-common \
+    && apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends $JRE \
+        gdal-bin \
+        imagemagick \
+        tesseract-ocr \
+        tesseract-ocr-eng \
+        tesseract-ocr-ita \
+        tesseract-ocr-fra \
+        tesseract-ocr-spa \
+        tesseract-ocr-deu \
+        tesseract-ocr-jpn \
+    && echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula 
select true | debconf-set-selections \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends \
+        xfonts-utils \
+        fonts-freefont-ttf \
+        fonts-liberation \
+        ttf-mscorefonts-installer \
+        wget \
+        cabextract \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ARG TIKA_VERSION
+ENV TIKA_VERSION=$TIKA_VERSION
+
+COPY --from=fetch_tika /tika-server-standard-${TIKA_VERSION}.jar 
/tika-server-standard-${TIKA_VERSION}.jar
+USER $UID_GID
+
+EXPOSE 9998
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+
+LABEL maintainer="Apache Tika Developers [email protected]"
+
diff --git a/tika-server/docker-build/full/Dockerfile.snapshot 
b/tika-server/docker-build/full/Dockerfile.snapshot
new file mode 100644
index 0000000000..8882dc5b90
--- /dev/null
+++ b/tika-server/docker-build/full/Dockerfile.snapshot
@@ -0,0 +1,52 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+# Snapshot variant: copies the JAR from the Maven build output rather than
+# downloading from Apache mirrors. Used for nightly/snapshot Docker builds.
+
+ARG UID_GID="35002:35002"
+
+FROM ubuntu:plucky AS runtime
+ARG UID_GID
+ARG TIKA_VERSION
+ARG JRE='openjdk-21-jre-headless'
+RUN set -eux \
+    && apt-get update \
+    && apt-get install --yes --no-install-recommends gnupg2 
software-properties-common \
+    && apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends $JRE \
+        gdal-bin \
+        imagemagick \
+        tesseract-ocr \
+        tesseract-ocr-eng \
+        tesseract-ocr-ita \
+        tesseract-ocr-fra \
+        tesseract-ocr-spa \
+        tesseract-ocr-deu \
+        tesseract-ocr-jpn \
+    && echo ttf-mscorefonts-installer msttcorefonts/accepted-mscorefonts-eula 
select true | debconf-set-selections \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends \
+        xfonts-utils \
+        fonts-freefont-ttf \
+        fonts-liberation \
+        ttf-mscorefonts-installer \
+        wget \
+        cabextract \
+    && apt-get clean -y \
+    && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ENV TIKA_VERSION=$TIKA_VERSION
+COPY tika-server-standard-${TIKA_VERSION}.jar 
/tika-server-standard-${TIKA_VERSION}.jar
+USER $UID_GID
+EXPOSE 9998
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+
+LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/tika-server/docker-build/minimal/Dockerfile 
b/tika-server/docker-build/minimal/Dockerfile
new file mode 100644
index 0000000000..1c5195920a
--- /dev/null
+++ b/tika-server/docker-build/minimal/Dockerfile
@@ -0,0 +1,70 @@
+
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+# "random" uid/gid hopefully not used anywhere else
+# This needs to be set globally and then referenced in
+# the subsequent stages -- see TIKA-3912
+ARG UID_GID="35002:35002"
+
+FROM ubuntu:plucky AS base
+
+FROM base AS fetch_tika
+
+ARG TIKA_VERSION
+ARG CHECK_SIG=true
+
+ENV 
NEAREST_TIKA_SERVER_URL="https://dlcdn.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar";
 \
+    
ARCHIVE_TIKA_SERVER_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar";
 \
+    
BACKUP_TIKA_SERVER_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar";
 \
+    
DEFAULT_TIKA_SERVER_ASC_URL="https://downloads.apache.org/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar.asc";
 \
+    
ARCHIVE_TIKA_SERVER_ASC_URL="https://archive.apache.org/dist/tika/${TIKA_VERSION}/tika-server-standard-${TIKA_VERSION}.jar.asc";
 \
+    TIKA_VERSION=$TIKA_VERSION
+
+RUN set -eux \
+    && apt-get update \
+    && DEBIAN_FRONTEND=noninteractive apt-get install --yes 
--no-install-recommends \
+        gnupg2 \
+        wget \
+        ca-certificates \
+    && wget -t 10 --max-redirect 1 --retry-connrefused -qO- 
https://downloads.apache.org/tika/KEYS | gpg --import \
+    && wget -t 10 --max-redirect 1 --retry-connrefused 
$NEAREST_TIKA_SERVER_URL -O /tika-server-standard-${TIKA_VERSION}.jar || rm 
/tika-server-standard-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar ]" || wget 
$ARCHIVE_TIKA_SERVER_URL -O /tika-server-standard-${TIKA_VERSION}.jar || rm 
/tika-server-standard-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar ]" || wget 
$BACKUP_TIKA_SERVER_URL -O /tika-server-standard-${TIKA_VERSION}.jar || rm 
/tika-server-standard-${TIKA_VERSION}.jar \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar ]" || exit 1 \
+    && wget -t 10 --max-redirect 1 --retry-connrefused 
$DEFAULT_TIKA_SERVER_ASC_URL -O /tika-server-standard-${TIKA_VERSION}.jar.asc  
|| rm /tika-server-standard-${TIKA_VERSION}.jar.asc \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar.asc ]" || wget 
$ARCHIVE_TIKA_SERVER_ASC_URL -O /tika-server-standard-${TIKA_VERSION}.jar.asc 
|| rm /tika-server-standard-${TIKA_VERSION}.jar.asc \
+    && sh -c "[ -f /tika-server-standard-${TIKA_VERSION}.jar.asc ]" || exit 1 \
+    && gpg --verify /tika-server-standard-${TIKA_VERSION}.jar.asc 
/tika-server-standard-${TIKA_VERSION}.jar
+
+# this used to work, but I'm getting "ERROR: failed to solve: failed to 
prepare $data as $data2: invalid argument"
+# when trying to build 2.9.2.0
+#RUN if [ "$CHECK_SIG" = "true" ] ; then gpg --verify 
/tika-server-standard-${TIKA_VERSION}.jar.asc 
/tika-server-standard-${TIKA_VERSION}.jar; fi
+
+FROM base AS runtime
+# must reference uid_gid
+ARG UID_GID
+ARG JRE='openjdk-21-jre-headless'
+RUN set -eux \
+    && apt-get update \
+    && apt-get install --yes --no-install-recommends \
+        ${JRE} \
+        ca-certificates \
+    && apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ARG TIKA_VERSION
+ENV TIKA_VERSION=$TIKA_VERSION
+COPY --from=fetch_tika /tika-server-standard-${TIKA_VERSION}.jar 
/tika-server-standard-${TIKA_VERSION}.jar
+USER $UID_GID
+EXPOSE 9998
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+
+LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/tika-server/docker-build/minimal/Dockerfile.snapshot 
b/tika-server/docker-build/minimal/Dockerfile.snapshot
new file mode 100644
index 0000000000..ac6644f345
--- /dev/null
+++ b/tika-server/docker-build/minimal/Dockerfile.snapshot
@@ -0,0 +1,34 @@
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not
+# use this file except in compliance with the License. You may obtain a copy of
+# the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+# License for the specific language governing permissions and limitations under
+# the License.
+
+# Snapshot variant: copies the JAR from the Maven build output rather than
+# downloading from Apache mirrors. Used for nightly/snapshot Docker builds.
+
+ARG UID_GID="35002:35002"
+
+FROM ubuntu:plucky AS runtime
+ARG UID_GID
+ARG TIKA_VERSION
+ARG JRE='openjdk-21-jre-headless'
+RUN set -eux \
+    && apt-get update \
+    && apt-get install --yes --no-install-recommends \
+        ${JRE} \
+        ca-certificates \
+    && apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
+ENV TIKA_VERSION=$TIKA_VERSION
+COPY tika-server-standard-${TIKA_VERSION}.jar 
/tika-server-standard-${TIKA_VERSION}.jar
+USER $UID_GID
+EXPOSE 9998
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+
+LABEL maintainer="Apache Tika Developers [email protected]"
diff --git 
a/tika-server/docker-build/sample-configs/customocr/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
 
b/tika-server/docker-build/sample-configs/customocr/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
new file mode 100644
index 0000000000..b4b787ffc6
--- /dev/null
+++ 
b/tika-server/docker-build/sample-configs/customocr/org/apache/tika/parser/ocr/TesseractOCRConfig.properties
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# You customise or add the settings you want here
+language=eng+spa+fra+deu+ita
+timeout=240
+minFileSizeToOcr=1
+enableImageProcessing=0
+density=200
+depth=8
+filter=box
+resize=300
+applyRotation=true
\ No newline at end of file
diff --git 
a/tika-server/docker-build/sample-configs/customocr/tika-config-inline.xml 
b/tika-server/docker-build/sample-configs/customocr/tika-config-inline.xml
new file mode 100644
index 0000000000..1c9b613033
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/customocr/tika-config-inline.xml
@@ -0,0 +1,31 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+  <parsers>     
+        <!-- Load TesseractOCRParser (could use DefaultParser if you want 
others too) -->
+        <parser class="org.apache.tika.parser.ocr.TesseractOCRParser"/>   
+
+        <!-- Extract and OCR Inline Images in PDF -->
+        <parser class="org.apache.tika.parser.pdf.PDFParser">
+            <params>
+                <param name="extractInlineImages" type="bool">true</param>
+            </params>
+        </parser>
+        
+  </parsers>
+</properties>
diff --git 
a/tika-server/docker-build/sample-configs/customocr/tika-config-rendered.xml 
b/tika-server/docker-build/sample-configs/customocr/tika-config-rendered.xml
new file mode 100644
index 0000000000..bcd8666996
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/customocr/tika-config-rendered.xml
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+  <parsers>     
+        <!-- Load TesseractOCRParser (could use DefaultParser if you want 
others too) -->
+        <parser class="org.apache.tika.parser.ocr.TesseractOCRParser"/>   
+
+        <!-- OCR on Rendered Pages -->
+        <parser class="org.apache.tika.parser.pdf.PDFParser">
+            <params>
+                <!-- no_ocr - extract text only
+                     ocr_only - don't extract text and just attempt OCR
+                     ocr_and_text - extract text and attempt OCR (from Tika 
1.24)
+                     auto - extract text but if < 10 characters try OCR
+                -->
+                <param name="ocrStrategy" type="string">ocr_only</param>
+                <param name="ocrImageType" type="string">rgb</param>
+                <param name="ocrDPI" type="int">100</param>
+            </params>
+        </parser>
+
+  </parsers>
+</properties>
diff --git 
a/tika-server/docker-build/sample-configs/grobid/org/apache/tika/parser/journal/GrobidExtractor.properties
 
b/tika-server/docker-build/sample-configs/grobid/org/apache/tika/parser/journal/GrobidExtractor.properties
new file mode 100644
index 0000000000..44689a2bb3
--- /dev/null
+++ 
b/tika-server/docker-build/sample-configs/grobid/org/apache/tika/parser/journal/GrobidExtractor.properties
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+grobid.server.url=http://grobid:8070
\ No newline at end of file
diff --git a/tika-server/docker-build/sample-configs/grobid/tika-config.xml 
b/tika-server/docker-build/sample-configs/grobid/tika-config.xml
new file mode 100644
index 0000000000..5b4aad9c72
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/grobid/tika-config.xml
@@ -0,0 +1,24 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+  <parsers>
+    <parser class="org.apache.tika.parser.journal.JournalParser">
+      <mime>application/pdf</mime>
+    </parser>
+  </parsers>
+</properties>
diff --git a/tika-server/docker-build/sample-configs/ner/run_tika_server.sh 
b/tika-server/docker-build/sample-configs/ner/run_tika_server.sh
new file mode 100755
index 0000000000..fb447be4cf
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/ner/run_tika_server.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+#############################################################################
+# See https://cwiki.apache.org/confluence/display/TIKA/TikaAndNER for details
+# on how to configure additional NER libraries
+#############################################################################
+
+# ------------------------------------
+# Download OpenNLP Models to classpath
+# ------------------------------------
+
+OPENNLP_LOCATION="/ner/org/apache/tika/parser/ner/opennlp"
+URL="http://opennlp.sourceforge.net/models-1.5";
+
+mkdir -p $OPENNLP_LOCATION
+if [ "$(ls -A $OPENNLP_LOCATION/*.bin)" ]; then
+    echo "OpenNLP models directory has files, so skipping fetch";
+else
+       echo "No OpenNLP models found, so fetching them"
+       wget "$URL/en-ner-person.bin" -O $OPENNLP_LOCATION/ner-person.bin
+       wget "$URL/en-ner-location.bin" -O $OPENNLP_LOCATION/ner-location.bin
+       wget "$URL/en-ner-organization.bin" -O 
$OPENNLP_LOCATION/ner-organization.bin;
+       wget "$URL/en-ner-date.bin" -O $OPENNLP_LOCATION/ner-date.bin
+       wget "$URL/en-ner-time.bin" -O $OPENNLP_LOCATION/ner-time.bin
+       wget "$URL/en-ner-percentage.bin" -O 
$OPENNLP_LOCATION/ner-percentage.bin
+       wget "$URL/en-ner-money.bin" -O $OPENNLP_LOCATION/ner-money.bin
+fi
+
+# --------------------------------------------
+# Create RexExp Example for Email on classpath
+# --------------------------------------------
+REGEXP_LOCATION="/ner/org/apache/tika/parser/ner/regex"
+mkdir -p $REGEXP_LOCATION
+echo 
"EMAIL=(?:[a-z0-9!#$%&'*+/=?^_\`{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_\`{|}~-]+)*|\"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*\")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])"
 > $REGEXP_LOCATION/ner-regex.txt
+
+
+# -------------------
+# Now run Tika Server
+# -------------------
+
+# Can be a single implementation or comma seperated list for multiple for 
"ner.impl.class" property
+RECOGNISERS=org.apache.tika.parser.ner.opennlp.OpenNLPNERecogniser,org.apache.tika.parser.ner.regex.RegexNERecogniser
+# Set classpath to the Tika Server JAR and the /ner folder so it has the 
configuration and models from above
+CLASSPATH="/ner:/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*"
+# Run the server with the custom configuration ner.impl.class property and 
custom /ner/tika-config.xml
+exec java -Dner.impl.class=$RECOGNISERS -cp $CLASSPATH 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 -c /ner/tika-config.xml
\ No newline at end of file
diff --git a/tika-server/docker-build/sample-configs/ner/tika-config.xml 
b/tika-server/docker-build/sample-configs/ner/tika-config.xml
new file mode 100644
index 0000000000..65d5774c22
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/ner/tika-config.xml
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+    <parsers>
+        <parser class="org.apache.tika.parser.ner.NamedEntityParser">
+            <mime>application/pdf</mime>
+            <mime>text/plain</mime>
+            <mime>text/html</mime>
+            <mime>application/xhtml+xml</mime>
+        </parser>
+    </parsers>
+</properties>
+
diff --git 
a/tika-server/docker-build/sample-configs/vision/inception-rest-caption.xml 
b/tika-server/docker-build/sample-configs/vision/inception-rest-caption.xml
new file mode 100644
index 0000000000..c70c207b28
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/vision/inception-rest-caption.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+    <parsers>
+        <parser 
class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
+            <mime>image/jpeg</mime>
+            <mime>image/png</mime>
+            <mime>image/gif</mime>
+            <params>
+                <param name="apiBaseUri" 
type="uri">http://inception-caption:8764/inception/v3</param>
+                <param name="captions" type="int">5</param>
+                <param name="maxCaptionLength" type="int">15</param>
+                <param name="class" 
type="string">org.apache.tika.parser.captioning.tf.TensorflowRESTCaptioner</param>
+            </params>
+        </parser>
+    </parsers>
+</properties>
\ No newline at end of file
diff --git 
a/tika-server/docker-build/sample-configs/vision/inception-rest-video.xml 
b/tika-server/docker-build/sample-configs/vision/inception-rest-video.xml
new file mode 100644
index 0000000000..f6a4e6a938
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/vision/inception-rest-video.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+    <parsers>
+        <parser 
class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
+            <mime>video/mp4</mime>
+            <mime>video/quicktime</mime>
+            <params>
+                <param name="apiBaseUri" 
type="uri">http://inception-video:8764/inception/v4</param>
+                <param name="topN" type="int">4</param>
+                <param name="minConfidence" type="double">0.015</param>
+                <param name="mode" type="string">fixed</param>
+                <param name="class" 
type="string">org.apache.tika.parser.recognition.tf.TensorflowRESTVideoRecogniser</param>
+            </params>
+        </parser>
+    </parsers>
+</properties>
\ No newline at end of file
diff --git a/tika-server/docker-build/sample-configs/vision/inception-rest.xml 
b/tika-server/docker-build/sample-configs/vision/inception-rest.xml
new file mode 100644
index 0000000000..caa6468595
--- /dev/null
+++ b/tika-server/docker-build/sample-configs/vision/inception-rest.xml
@@ -0,0 +1,32 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  ~ Licensed to the Apache Software Foundation (ASF) under one or more
+  ~ contributor license agreements.  See the NOTICE file distributed with
+  ~ this work for additional information regarding copyright ownership.
+  ~ The ASF licenses this file to You under the Apache License, Version 2.0
+  ~ (the "License"); you may not use this file except in compliance with
+  ~ the License.  You may obtain a copy of the License at
+  ~
+  ~    http://www.apache.org/licenses/LICENSE-2.0
+  ~
+  ~ Unless required by applicable law or agreed to in writing, software
+  ~ distributed under the License is distributed on an "AS IS" BASIS,
+  ~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  ~ See the License for the specific language governing permissions and
+  ~ limitations under the License.
+  -->
+<properties>
+    <parsers>
+        <parser 
class="org.apache.tika.parser.recognition.ObjectRecognitionParser">
+            <mime>image/jpeg</mime>
+            <mime>image/png</mime>
+            <mime>image/gif</mime>
+            <params>
+                <param name="apiBaseUri" 
type="uri">http://inception-rest:8764/inception/v4</param>
+                <param name="topN" type="int">2</param>
+                <param name="minConfidence" type="double">0.015</param>
+                <param name="class" 
type="string">org.apache.tika.parser.recognition.tf.TensorflowRESTRecogniser</param>
+            </params>
+        </parser>
+    </parsers>
+</properties>

Reply via email to