This is an automated email from the ASF dual-hosted git repository.
yihua pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 3d32596a741d fix(docker): tag base image per Java version to avoid
latest collision (#18663)
3d32596a741d is described below
commit 3d32596a741d6d4012da3202eb545745b84cb4d5
Author: voonhous <[email protected]>
AuthorDate: Sat May 16 04:36:11 2026 +0800
fix(docker): tag base image per Java version to avoid latest collision
(#18663)
---
docker/README.md | 34 +++++++++++++++++++++------
docker/build_docker_images.sh | 6 ++++-
docker/hoodie/hadoop/datanode/Dockerfile | 5 ++--
docker/hoodie/hadoop/historyserver/Dockerfile | 5 ++--
docker/hoodie/hadoop/hive_base/Dockerfile | 5 ++--
docker/hoodie/hadoop/namenode/Dockerfile | 5 ++--
docker/hoodie/hadoop/prestobase/Dockerfile | 3 ++-
docker/hoodie/hadoop/trinobase/Dockerfile | 3 ++-
8 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/docker/README.md b/docker/README.md
index 718d1943ef7e..f655f42dca8b 100644
--- a/docker/README.md
+++ b/docker/README.md
@@ -27,6 +27,22 @@ docker demo environment.
The `/hoodie` folder contains all the configs for assembling necessary docker
images. The name and repository of each
docker image, e.g., `apachehudi/hudi-hadoop_2.8.4-trinobase_368`, is defined
in the maven configuration file `pom.xml`.
+### Base images by Java version
+
+`build_docker_images.sh` auto-selects one of the two supported base images
from `--spark-version`:
+
+| Base module | JDK | Used for |
+|---------------|---------|------------|
+| `base_java11` | Java 11 | Spark 3.x |
+| `base_java17` | Java 17 | Spark 4.0+ |
+
+The legacy Java 8 `base` module under `/hoodie/hadoop/base` is retained for
historical reference only; Spark 2.x is no
+longer supported and `build_docker_images.sh` never selects it.
+
+Downstream Dockerfiles (`datanode`, `historyserver`, `hive_base`, `namenode`,
`prestobase`, `trinobase`) pick the base
+via the `BASE_IMAGE_TAG` build arg (default `java11`).
`build_docker_images.sh` sets it automatically; bare `docker
+build` invocations targeting the Java 17 base must pass `--build-arg
BASE_IMAGE_TAG=java17`.
+
### Docker compose config for the Demo - `/compose`
The `/compose` folder contains the yaml file to compose the Docker environment
for running Hudi Demo.
@@ -150,19 +166,23 @@ push the image to the dockerhub repo:
# Run under hoodie/hadoop, the <tag> is optional, "latest" by default
docker buildx build <image_folder_name> --platform <comma-separated,platforms>
-t <hub-user>/<repo-name>[:<tag>] --push
-# For example, to build base image
-docker buildx build base --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-base:linux-arm64-0.10.1 --push
+# For example, to build the Java 11 base image
+docker buildx build base_java11 --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-base-java11:linux-arm64-0.10.1 --push
```
+Note: the base image is now tagged per Java variant (`-base-java11` /
`-base-java17`). Downstream Dockerfiles
+select the variant via the `BASE_IMAGE_TAG` build arg (default `java11`). If
you also need the Java 17 base for
+arm64, repeat the build against `base_java17` and tag it as
`...-base-java17:<tag>`.
+
Once the base image is pushed then you could do something similar for other
images.
Change [hive](./hoodie/hadoop/hive_base/Dockerfile) dockerfile to pull the
base image with tag corresponding to
linux/arm64 platform.
```
# Change below line in the Dockerfile
-FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
-# as shown below
-FROM --platform=linux/arm64
apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:linux-arm64-0.10.1
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest
+# as shown below (pin to the same Java variant you built above, e.g. java11)
+FROM --platform=linux/arm64
apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:linux-arm64-0.10.1
# and then build & push from under hoodie/hadoop dir
docker buildx build hive_base --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1 --push
@@ -178,8 +198,8 @@ shows what changes to make in Dockerfiles (assuming tag is
named `linux-arm64-0.
of `docker buildx` commands.
```
-docker buildx build base --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-base:linux-arm64-0.10.1 --push
-docker buildx build datanode --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-datanode:linux-arm64-0.10.1 --push
+docker buildx build base_java11 --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-base-java11:linux-arm64-0.10.1 --push
+docker buildx build datanode --platform linux/arm64 --build-arg
BASE_IMAGE_TAG=java11 -t
apachehudi/hudi-hadoop_2.8.4-datanode:linux-arm64-0.10.1 --push
docker buildx build historyserver --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-history:linux-arm64-0.10.1 --push
docker buildx build hive_base --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-hive_2.3.3:linux-arm64-0.10.1 --push
docker buildx build namenode --platform linux/arm64 -t
apachehudi/hudi-hadoop_2.8.4-namenode:linux-arm64-0.10.1 --push
diff --git a/docker/build_docker_images.sh b/docker/build_docker_images.sh
index 5756f87d7a1a..bb613a2a5155 100755
--- a/docker/build_docker_images.sh
+++ b/docker/build_docker_images.sh
@@ -95,16 +95,18 @@ DOCKER_CONTEXT_DIR="hoodie/hadoop"
SPARK_MAJOR=$(echo "$SPARK_VERSION" | cut -d. -f1)
if [ "$SPARK_MAJOR" -ge 4 ] 2>/dev/null; then
BASE_IMAGE_DIR="base_java17"
+ BASE_JAVA_TAG="java17"
echo "Using Java 17 base image for Spark ${SPARK_VERSION}"
else
BASE_IMAGE_DIR="base_java11"
+ BASE_JAVA_TAG="java11"
echo "Using Java 11 base image for Spark ${SPARK_VERSION}"
fi
# List of images to build: "subdir|image_base_name"
# Each entry: <subdir>|<image_base_name>
DOCKER_IMAGES=(
- "${BASE_IMAGE_DIR}|apachehudi/hudi-hadoop_${HADOOP_VERSION}-base"
+
"${BASE_IMAGE_DIR}|apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_JAVA_TAG}"
"datanode|apachehudi/hudi-hadoop_${HADOOP_VERSION}-datanode"
"historyserver|apachehudi/hudi-hadoop_${HADOOP_VERSION}-history"
"hive_base|apachehudi/hudi-hadoop_${HADOOP_VERSION}-hive_${HIVE_VERSION}"
@@ -128,6 +130,7 @@ for IMAGE_CONFIG in "${DOCKER_IMAGES[@]}"; do
--build-arg HADOOP_VERSION=${HADOOP_VERSION} \
--build-arg SPARK_VERSION=${SPARK_VERSION} \
--build-arg HIVE_VERSION=${HIVE_VERSION} \
+ --build-arg BASE_IMAGE_TAG=${BASE_JAVA_TAG} \
"$IMAGE_CONTEXT" -t "$TAG_LATEST" -t "$TAG_VERSIONED"; then
echo "Error: Failed to build docker image for $IMAGE_CONTEXT"
exit 1
@@ -137,6 +140,7 @@ for IMAGE_CONFIG in "${DOCKER_IMAGES[@]}"; do
--build-arg HADOOP_VERSION=${HADOOP_VERSION} \
--build-arg SPARK_VERSION=${SPARK_VERSION} \
--build-arg HIVE_VERSION=${HIVE_VERSION} \
+ --build-arg BASE_IMAGE_TAG=${BASE_JAVA_TAG} \
"$IMAGE_CONTEXT" -t "$TAG_LATEST" -t "$TAG_VERSIONED"; then
echo "Error: Failed to build docker image for $IMAGE_CONTEXT"
exit 1
diff --git a/docker/hoodie/hadoop/datanode/Dockerfile
b/docker/hoodie/hadoop/datanode/Dockerfile
index 61297bd3b6bb..bc157214f182 100644
--- a/docker/hoodie/hadoop/datanode/Dockerfile
+++ b/docker/hoodie/hadoop/datanode/Dockerfile
@@ -15,9 +15,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=3.3.4
+ARG HADOOP_VERSION=3.3.4
ARG HADOOP_DN_PORT=50075
-FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
+ARG BASE_IMAGE_TAG=java11
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest
ENV HADOOP_DN_PORT ${HADOOP_DN_PORT}
diff --git a/docker/hoodie/hadoop/historyserver/Dockerfile
b/docker/hoodie/hadoop/historyserver/Dockerfile
index f001b511d94a..0c77188e3e51 100644
--- a/docker/hoodie/hadoop/historyserver/Dockerfile
+++ b/docker/hoodie/hadoop/historyserver/Dockerfile
@@ -15,9 +15,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=3.3.4
+ARG HADOOP_VERSION=3.3.4
ARG HADOOP_HISTORY_PORT=8188
-FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
+ARG BASE_IMAGE_TAG=java11
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest
# Install unzip and wget
RUN apt-get update && \
diff --git a/docker/hoodie/hadoop/hive_base/Dockerfile
b/docker/hoodie/hadoop/hive_base/Dockerfile
index 98d26895c077..f77c4c4e455e 100644
--- a/docker/hoodie/hadoop/hive_base/Dockerfile
+++ b/docker/hoodie/hadoop/hive_base/Dockerfile
@@ -15,8 +15,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=3.3.4
-FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
+ARG HADOOP_VERSION=3.3.4
+ARG BASE_IMAGE_TAG=java11
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest
ENV HIVE_HOME /opt/hive
ENV PATH $HIVE_HOME/bin:$PATH
diff --git a/docker/hoodie/hadoop/namenode/Dockerfile
b/docker/hoodie/hadoop/namenode/Dockerfile
index 776d03eb6670..33e2ab4b9955 100644
--- a/docker/hoodie/hadoop/namenode/Dockerfile
+++ b/docker/hoodie/hadoop/namenode/Dockerfile
@@ -15,9 +15,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-ARG HADOOP_VERSION=3.3.4
+ARG HADOOP_VERSION=3.3.4
ARG HADOOP_WEBHDFS_PORT=50070
-FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest
+ARG BASE_IMAGE_TAG=java11
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest
ENV HADOOP_WEBHDFS_PORT ${HADOOP_WEBHDFS_PORT}
diff --git a/docker/hoodie/hadoop/prestobase/Dockerfile
b/docker/hoodie/hadoop/prestobase/Dockerfile
index accedb94db3d..d40aa9c8f273 100644
--- a/docker/hoodie/hadoop/prestobase/Dockerfile
+++ b/docker/hoodie/hadoop/prestobase/Dockerfile
@@ -20,7 +20,8 @@
ARG HADOOP_VERSION=2.8.4
ARG HIVE_VERSION=2.3.3
-FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base:latest as hadoop-base
+ARG BASE_IMAGE_TAG=java11
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest as
hadoop-base
ARG PRESTO_VERSION=0.271
diff --git a/docker/hoodie/hadoop/trinobase/Dockerfile
b/docker/hoodie/hadoop/trinobase/Dockerfile
index 9d7c23010fbb..0700fa2f6bfb 100644
--- a/docker/hoodie/hadoop/trinobase/Dockerfile
+++ b/docker/hoodie/hadoop/trinobase/Dockerfile
@@ -20,7 +20,8 @@
ARG HADOOP_VERSION=2.8.4
ARG HIVE_VERSION=2.3.3
-FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-java11:latest as hadoop-base
+ARG BASE_IMAGE_TAG=java11
+FROM apachehudi/hudi-hadoop_${HADOOP_VERSION}-base-${BASE_IMAGE_TAG}:latest as
hadoop-base
ENV TRINO_VERSION=368
ENV TRINO_HOME=/usr/local/trino