This is an automated email from the ASF dual-hosted git repository.

xushiyan pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hudi.git


The following commit(s) were added to refs/heads/master by this push:
     new 4689578f77a3 fix: build notebook hive image using compatible mode for 
arm64 (#14190)
4689578f77a3 is described below

commit 4689578f77a3172a8706fb6ad9b7ba55595b34be
Author: Shiyan Xu <[email protected]>
AuthorDate: Thu Oct 30 13:34:47 2025 -0700

    fix: build notebook hive image using compatible mode for arm64 (#14190)
---
 hudi-notebooks/Dockerfile.hive    | 5 +++--
 hudi-notebooks/build.sh           | 4 +++-
 hudi-notebooks/docker-compose.yml | 1 +
 hudi-notebooks/notebooks/utils.py | 7 ++++---
 hudi-notebooks/run_spark_hudi.sh  | 2 +-
 5 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/hudi-notebooks/Dockerfile.hive b/hudi-notebooks/Dockerfile.hive
index 4db708947447..0db9708d78cd 100644
--- a/hudi-notebooks/Dockerfile.hive
+++ b/hudi-notebooks/Dockerfile.hive
@@ -15,8 +15,9 @@
 # limitations under the License.
 
 ARG HIVE_VERSION=${HIVE_VERSION:-3.1.3}
+ARG TARGET_PLATFORM=${TARGET_PLATFORM:-linux/amd64}
 
-FROM apache/hive:$HIVE_VERSION
+FROM --platform=$TARGET_PLATFORM apache/hive:$HIVE_VERSION
 
 USER root
 
@@ -28,4 +29,4 @@ COPY conf/hive/metastore-site.xml 
$HIVE_HOME/conf/metastore-site.xml
 RUN mkdir -p $HIVE_HOME/.beeline && \
     ln -s $HADOOP_HOME/share/hadoop/tools/lib/hadoop-aws-3.1.0.jar 
$HIVE_HOME/lib/hadoop-aws.jar && \
     ln -s $HADOOP_HOME/share/hadoop/tools/lib/aws-java-sdk-bundle-1.11.271.jar 
$HIVE_HOME/lib/aws-java-sdk.jar && \
-    sed -i 
'/<name>hive.execution.engine<\/name>/{n;s|<value>.*</value>|<value>mr</value>|}'
 "$HIVE_HOME/conf/hive-site.xml"
\ No newline at end of file
+    sed -i 
'/<name>hive.execution.engine<\/name>/{n;s|<value>.*</value>|<value>mr</value>|}'
 "$HIVE_HOME/conf/hive-site.xml"
diff --git a/hudi-notebooks/build.sh b/hudi-notebooks/build.sh
index 83847f62c29e..2a103e9377a9 100644
--- a/hudi-notebooks/build.sh
+++ b/hudi-notebooks/build.sh
@@ -36,7 +36,9 @@ docker build \
 
 echo "Building Hive Docker image using Hive version: $HIVE_VERSION"
 
-docker build \
+export TARGET_PLATFORM=linux/amd64
+docker buildx build \
+    --platform $TARGET_PLATFORM \
     --build-arg HIVE_VERSION="$HIVE_VERSION" \
     -t apachehudi/hive:latest \
     -t apachehudi/hive:"$HIVE_VERSION_TAG" \
diff --git a/hudi-notebooks/docker-compose.yml 
b/hudi-notebooks/docker-compose.yml
index 626c01d519b6..8ec48d1de25e 100644
--- a/hudi-notebooks/docker-compose.yml
+++ b/hudi-notebooks/docker-compose.yml
@@ -80,6 +80,7 @@ services:
 
   hive-metastore:
     image: apachehudi/hive:latest
+    platform: linux/amd64
     container_name: hive-metastore
     command: /opt/hive/bin/hive --service metastore
     environment:
diff --git a/hudi-notebooks/notebooks/utils.py 
b/hudi-notebooks/notebooks/utils.py
index 007761ebe53d..5318a1340cef 100644
--- a/hudi-notebooks/notebooks/utils.py
+++ b/hudi-notebooks/notebooks/utils.py
@@ -19,12 +19,13 @@ from IPython.display import display as display_html, HTML
 import boto3
 from urllib.parse import urlparse
 
-def get_spark_session(app_name="Hudi-Notebooks"):
+def get_spark_session(app_name="Hudi-Notebooks", log_level="WARN"):
     """
     Initialize a SparkSession
     
     Parameters:
     - app_name (str): Optional name for the Spark application.
+    - log_level (str): Log level for Spark (DEBUG, INFO, WARN, ERROR). 
Defaults to WARN.
     
     Returns:
     - SparkSession object
@@ -33,11 +34,11 @@ def get_spark_session(app_name="Hudi-Notebooks"):
     spark_session = SparkSession.builder \
         .appName(app_name) \
            .config("spark.hadoop.fs.defaultFS", "s3a://warehouse") \
+        .config("spark.log.level", log_level) \
         .enableHiveSupport() \
         .getOrCreate()
         
-    spark_session.sparkContext.setLogLevel("ERROR")
-    print(f"SparkSession started with app name: {app_name}")
+    print(f"SparkSession started with app name: {app_name}, log level: 
{log_level}")
     
     return spark_session
 
diff --git a/hudi-notebooks/run_spark_hudi.sh b/hudi-notebooks/run_spark_hudi.sh
index 4cda555a71a5..618737a3273c 100644
--- a/hudi-notebooks/run_spark_hudi.sh
+++ b/hudi-notebooks/run_spark_hudi.sh
@@ -49,4 +49,4 @@ case "$state" in
   *)
     echo "Usage: $0 {start|stop|restart}"
     exit 1
-esac
\ No newline at end of file
+esac

Reply via email to