This is an automated email from the ASF dual-hosted git repository.

ndipiazza pushed a commit to branch TIKA-4703-docker-ci
in repository https://gitbox.apache.org/repos/asf/tika.git

commit 02d061bf89da22cfc7cc9695c5527d099efe34d9
Author: Nicholas DiPiazza <[email protected]>
AuthorDate: Fri Mar 27 12:32:33 2026 -0500

    TIKA-4703: Fix Docker builds found during local testing
    
    - tika-server snapshot Dockerfiles: use assembly tgz (thin JAR + lib/)
      instead of the thin JAR alone, matching the 4.x packaging model
    - tika-grpc: bundle default-tika-config.json so the server starts
      without requiring a config volume mount
    - tika-grpc: pass -c, -p, and --plugin-roots as CLI args instead of
      system properties so TikaGrpcServer actually picks them up
    - tika-grpc: default port is now 9090 (configurable via TIKA_GRPC_PORT)
    
    Tested locally: all three images (minimal, full, grpc) build and start
    successfully.
    
    Co-Authored-By: Claude Opus 4.6 (1M context) <[email protected]>
---
 .github/workflows/docker-release.yml               |  1 +
 .github/workflows/docker-snapshot.yml              |  9 ++++---
 tika-grpc/docker-build/default-tika-config.json    | 20 +++++++++++++++
 tika-grpc/docker-build/docker-build.sh             |  1 +
 tika-grpc/docker-build/start-tika-grpc.sh          | 29 +++++++++++-----------
 tika-server/docker-build/full/Dockerfile.snapshot  |  6 ++---
 .../docker-build/minimal/Dockerfile.snapshot       |  6 ++---
 7 files changed, 48 insertions(+), 24 deletions(-)

diff --git a/.github/workflows/docker-release.yml 
b/.github/workflows/docker-release.yml
index a412c2a061..d2a3403e47 100644
--- a/.github/workflows/docker-release.yml
+++ b/.github/workflows/docker-release.yml
@@ -139,6 +139,7 @@ jobs:
           done
 
           cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/"
+          cp "tika-grpc/docker-build/default-tika-config.json" 
"${OUT_DIR}/config/"
           cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"
 
       - name: Build and push tika-grpc
diff --git a/.github/workflows/docker-snapshot.yml 
b/.github/workflows/docker-snapshot.yml
index 315cba2da4..28998e8f5f 100644
--- a/.github/workflows/docker-snapshot.yml
+++ b/.github/workflows/docker-snapshot.yml
@@ -65,8 +65,8 @@ jobs:
         run: |
           TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
           OUT_DIR=target/tika-server-minimal-docker
-          mkdir -p "${OUT_DIR}"
-          cp 
"tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}.jar"
 "${OUT_DIR}/"
+          mkdir -p "${OUT_DIR}/tika-server"
+          tar xzf 
"tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}-bin.tgz"
 -C "${OUT_DIR}/tika-server"
           cp "tika-server/docker-build/minimal/Dockerfile.snapshot" 
"${OUT_DIR}/Dockerfile"
 
       - name: Build and push tika-server minimal snapshot
@@ -85,8 +85,8 @@ jobs:
         run: |
           TIKA_VERSION="${{ steps.version.outputs.tika_version }}"
           OUT_DIR=target/tika-server-full-docker
-          mkdir -p "${OUT_DIR}"
-          cp 
"tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}.jar"
 "${OUT_DIR}/"
+          mkdir -p "${OUT_DIR}/tika-server"
+          tar xzf 
"tika-server/tika-server-standard/target/tika-server-standard-${TIKA_VERSION}-bin.tgz"
 -C "${OUT_DIR}/tika-server"
           cp "tika-server/docker-build/full/Dockerfile.snapshot" 
"${OUT_DIR}/Dockerfile"
 
       - name: Build and push tika-server full snapshot
@@ -133,6 +133,7 @@ jobs:
           done
 
           cp "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin/"
+          cp "tika-grpc/docker-build/default-tika-config.json" 
"${OUT_DIR}/config/"
           cp "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"
 
       - name: Build and push tika-grpc snapshot
diff --git a/tika-grpc/docker-build/default-tika-config.json 
b/tika-grpc/docker-build/default-tika-config.json
new file mode 100644
index 0000000000..000bb01812
--- /dev/null
+++ b/tika-grpc/docker-build/default-tika-config.json
@@ -0,0 +1,20 @@
+{
+  "fetchers": [
+    {
+      "fs": {
+        "defaultFetcher": {
+          "basePath": "/data/input"
+        }
+      }
+    }
+  ],
+  "emitters": [
+    {
+      "fs": {
+        "defaultEmitter": {
+          "basePath": "/data/output"
+        }
+      }
+    }
+  ]
+}
diff --git a/tika-grpc/docker-build/docker-build.sh 
b/tika-grpc/docker-build/docker-build.sh
index c522ec04fa..9ce5daa928 100755
--- a/tika-grpc/docker-build/docker-build.sh
+++ b/tika-grpc/docker-build/docker-build.sh
@@ -81,6 +81,7 @@ for parser_package in "${parser_packages[@]}"; do
 done
 
 cp -v -r "tika-grpc/docker-build/start-tika-grpc.sh" "${OUT_DIR}/bin"
+cp -v "tika-grpc/docker-build/default-tika-config.json" "${OUT_DIR}/config"
 cp -v "tika-grpc/docker-build/Dockerfile" "${OUT_DIR}/Dockerfile"
 
 cd "${OUT_DIR}" || exit
diff --git a/tika-grpc/docker-build/start-tika-grpc.sh 
b/tika-grpc/docker-build/start-tika-grpc.sh
index c42c953d7b..919a51afcc 100755
--- a/tika-grpc/docker-build/start-tika-grpc.sh
+++ b/tika-grpc/docker-build/start-tika-grpc.sh
@@ -12,21 +12,19 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-echo "Tika Version:"
-echo "${TIKA_VERSION}"
+# Use user-provided config or fall back to the bundled default
+TIKA_CONFIG="${TIKA_CONFIG:-/tika/config/default-tika-config.json}"
+
+echo "Tika Version: ${TIKA_VERSION}"
+echo "Tika Config: ${TIKA_CONFIG}"
 echo "Tika Plugins:"
 ls "/tika/plugins"
-echo "Tika gRPC Max Inbound Message Size:"
-echo "${TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE}"
-echo "Tika gRPC Max Outbound Message Size:"
-echo "${TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE}"
-echo "Tika gRPC Num Threads:"
-echo "${TIKA_GRPC_NUM_THREADS}"
+echo "Tika gRPC Max Inbound Message Size: 
${TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE}"
+echo "Tika gRPC Max Outbound Message Size: 
${TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE}"
+echo "Tika gRPC Num Threads: ${TIKA_GRPC_NUM_THREADS}"
+TIKA_GRPC_PORT="${TIKA_GRPC_PORT:-9090}"
+
 exec java \
-  -Dgrpc.server.port=9090 \
-  
"-Dgrpc.server.max-inbound-message-size=${TIKA_GRPC_MAX_INBOUND_MESSAGE_SIZE}" \
-  
"-Dgrpc.server.max-outbound-message-size=${TIKA_GRPC_MAX_OUTBOUND_MESSAGE_SIZE}"
 \
-  "-Dgrpc.server.numThreads=${TIKA_GRPC_NUM_THREADS}" \
   --add-opens=jdk.management/com.sun.management.internal=ALL-UNNAMED \
   --add-opens=java.base/jdk.internal.misc=ALL-UNNAMED \
   --add-opens=java.base/sun.nio.ch=ALL-UNNAMED \
@@ -38,5 +36,8 @@ exec java \
   --add-opens=java.base/java.util=ALL-UNNAMED \
   --add-opens=java.base/java.lang=ALL-UNNAMED \
   -Djava.net.preferIPv4Stack=true \
-  "-Dplugins.pluginDirs=/tika/plugins" \
-  -jar "/tika/libs/tika-grpc-${TIKA_VERSION}.jar"
+  -jar "/tika/libs/tika-grpc-${TIKA_VERSION}.jar" \
+  -c "${TIKA_CONFIG}" \
+  -p "${TIKA_GRPC_PORT}" \
+  --plugin-roots "/tika/plugins" \
+  "$@"
diff --git a/tika-server/docker-build/full/Dockerfile.snapshot 
b/tika-server/docker-build/full/Dockerfile.snapshot
index 8882dc5b90..4f655005e6 100644
--- a/tika-server/docker-build/full/Dockerfile.snapshot
+++ b/tika-server/docker-build/full/Dockerfile.snapshot
@@ -10,7 +10,7 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-# Snapshot variant: copies the JAR from the Maven build output rather than
+# Snapshot variant: copies the assembly from the Maven build output rather than
 # downloading from Apache mirrors. Used for nightly/snapshot Docker builds.
 
 ARG UID_GID="35002:35002"
@@ -44,9 +44,9 @@ RUN set -eux \
     && apt-get clean -y \
     && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 ENV TIKA_VERSION=$TIKA_VERSION
-COPY tika-server-standard-${TIKA_VERSION}.jar 
/tika-server-standard-${TIKA_VERSION}.jar
+COPY tika-server/ /tika-server/
 USER $UID_GID
 EXPOSE 9998
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server/tika-server.jar:/tika-server/lib/*:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
 
 LABEL maintainer="Apache Tika Developers [email protected]"
diff --git a/tika-server/docker-build/minimal/Dockerfile.snapshot 
b/tika-server/docker-build/minimal/Dockerfile.snapshot
index ac6644f345..d701dfee68 100644
--- a/tika-server/docker-build/minimal/Dockerfile.snapshot
+++ b/tika-server/docker-build/minimal/Dockerfile.snapshot
@@ -10,7 +10,7 @@
 # License for the specific language governing permissions and limitations under
 # the License.
 
-# Snapshot variant: copies the JAR from the Maven build output rather than
+# Snapshot variant: copies the assembly from the Maven build output rather than
 # downloading from Apache mirrors. Used for nightly/snapshot Docker builds.
 
 ARG UID_GID="35002:35002"
@@ -26,9 +26,9 @@ RUN set -eux \
         ca-certificates \
     && apt-get clean -y && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
 ENV TIKA_VERSION=$TIKA_VERSION
-COPY tika-server-standard-${TIKA_VERSION}.jar 
/tika-server-standard-${TIKA_VERSION}.jar
+COPY tika-server/ /tika-server/
 USER $UID_GID
 EXPOSE 9998
-ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server-standard-${TIKA_VERSION}.jar:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
+ENTRYPOINT [ "/bin/sh", "-c", "exec java -cp 
\"/tika-server/tika-server.jar:/tika-server/lib/*:/tika-extras/*\" 
org.apache.tika.server.core.TikaServerCli -h 0.0.0.0 $0 $@"]
 
 LABEL maintainer="Apache Tika Developers [email protected]"

Reply via email to