This is an automated email from the ASF dual-hosted git repository.
xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git
The following commit(s) were added to refs/heads/master by this push:
new 4689578f77a3 fix: build notebook hive image using compatible mode for
arm64 (#14190)
4689578f77a3 is described below
commit 4689578f77a3172a8706fb6ad9b7ba55595b34be
Author: Shiyan Xu <[email protected]>
AuthorDate: Thu Oct 30 13:34:47 2025 -0700
fix: build notebook hive image using compatible mode for arm64 (#14190)
---
hudi-notebooks/Dockerfile.hive | 5 +++--
hudi-notebooks/build.sh | 4 +++-
hudi-notebooks/docker-compose.yml | 1 +
hudi-notebooks/notebooks/utils.py | 7 ++++---
hudi-notebooks/run_spark_hudi.sh | 2 +-
5 files changed, 12 insertions(+), 7 deletions(-)
diff --git a/hudi-notebooks/Dockerfile.hive b/hudi-notebooks/Dockerfile.hive
index 4db708947447..0db9708d78cd 100644
--- a/hudi-notebooks/Dockerfile.hive
+++ b/hudi-notebooks/Dockerfile.hive
@@ -15,8 +15,9 @@
# limitations under the License.
ARG HIVE_VERSION=${HIVE_VERSION:-3.1.3}
+ARG TARGET_PLATFORM=${TARGET_PLATFORM:-linux/amd64}
-FROM apache/hive:$HIVE_VERSION
+FROM --platform=$TARGET_PLATFORM apache/hive:$HIVE_VERSION
USER root
@@ -28,4 +29,4 @@ COPY conf/hive/metastore-site.xml
$HIVE_HOME/conf/metastore-site.xml
RUN mkdir -p $HIVE_HOME/.beeline && \
ln -s $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-3.1.0.jar
$HIVE_HOME/lib/hadoop-aws.jar && \
ln -s $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.271.jar
$HIVE_HOME/lib/aws-java-sdk.jar && \
- sed -i
'/<name>hive.execution.engine<\/name>/{n;s|<value>.*</value>|<value>mr</value>|}'
"$HIVE_HOME/conf/hive-site.xml"
\ No newline at end of file
+ sed -i
'/<name>hive.execution.engine<\/name>/{n;s|<value>.*</value>|<value>mr</value>|}'
"$HIVE_HOME/conf/hive-site.xml"
diff --git a/hudi-notebooks/build.sh b/hudi-notebooks/build.sh
index 83847f62c29e..2a103e9377a9 100644
--- a/hudi-notebooks/build.sh
+++ b/hudi-notebooks/build.sh
@@ -36,7 +36,9 @@ docker build \
echo "Building Hive Docker image using Hive version: $HIVE_VERSION"
-docker build \
+export TARGET_PLATFORM=linux/amd64
+docker buildx build \
+ --platform $TARGET_PLATFORM \
--build-arg HIVE_VERSION="$HIVE_VERSION" \
-t apachehudi/hive:latest \
-t apachehudi/hive:"$HIVE_VERSION_TAG" \
diff --git a/hudi-notebooks/docker-compose.yml
b/hudi-notebooks/docker-compose.yml
index 626c01d519b6..8ec48d1de25e 100644
--- a/hudi-notebooks/docker-compose.yml
+++ b/hudi-notebooks/docker-compose.yml
@@ -80,6 +80,7 @@ services:
hive-metastore:
image: apachehudi/hive:latest
+ platform: linux/amd64
container_name: hive-metastore
command: /opt/hive/bin/hive --service metastore
environment:
diff --git a/hudi-notebooks/notebooks/utils.py
b/hudi-notebooks/notebooks/utils.py
index 007761ebe53d..5318a1340cef 100644
--- a/hudi-notebooks/notebooks/utils.py
+++ b/hudi-notebooks/notebooks/utils.py
@@ -19,12 +19,13 @@ from IPython.display import display as display_html, HTML
import boto3
from urllib.parse import urlparse
-def get_spark_session(app_name="Hudi-Notebooks"):
+def get_spark_session(app_name="Hudi-Notebooks", log_level="WARN"):
"""
Initialize a SparkSession
Parameters:
- app_name (str): Optional name for the Spark application.
+ - log_level (str): Log level for Spark (DEBUG, INFO, WARN, ERROR).
Defaults to WARN.
Returns:
- SparkSession object
@@ -33,11 +34,11 @@ def get_spark_session(app_name="Hudi-Notebooks"):
spark_session = SparkSession.builder \
.appName(app_name) \
.config("spark.hadoop.fs.defaultFS", "s3a://warehouse") \
+ .config("spark.log.level", log_level) \
.enableHiveSupport() \
.getOrCreate()
- spark_session.sparkContext.setLogLevel("ERROR")
- print(f"SparkSession started with app name: {app_name}")
+ print(f"SparkSession started with app name: {app_name}, log level:
{log_level}")
return spark_session
diff --git a/hudi-notebooks/run_spark_hudi.sh b/hudi-notebooks/run_spark_hudi.sh
index 4cda555a71a5..618737a3273c 100644
--- a/hudi-notebooks/run_spark_hudi.sh
+++ b/hudi-notebooks/run_spark_hudi.sh
@@ -49,4 +49,4 @@ case "$state" in
*)
echo "Usage: $0 {start|stop|restart}"
exit 1
-esac
\ No newline at end of file
+esac