This is an automated email from the ASF dual-hosted git repository.

yzou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git


The following commit(s) were added to refs/heads/main by this push:
     new c5907e668 Add regression test coverage for Spark Client with package 
conf (#1997)
c5907e668 is described below

commit c5907e66890211811bf28b9c6f2b81c14847d3a0
Author: Yun Zou <[email protected]>
AuthorDate: Thu Jul 3 17:01:37 2025 -0700

    Add regression test coverage for Spark Client with package conf (#1997)
---
 .github/workflows/spark_client_regtests.yml    |  2 +-
 plugins/spark/v3.5/regtests/README.md          |  4 +-
 plugins/spark/v3.5/regtests/docker-compose.yml |  1 +
 plugins/spark/v3.5/regtests/run.sh             | 89 ++++++++++++++++----------
 plugins/spark/v3.5/regtests/setup.sh           | 23 ++++++-
 5 files changed, 81 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/spark_client_regtests.yml 
b/.github/workflows/spark_client_regtests.yml
index b4da3331e..c74e3f763 100644
--- a/.github/workflows/spark_client_regtests.yml
+++ b/.github/workflows/spark_client_regtests.yml
@@ -47,7 +47,7 @@ jobs:
         run: ./gradlew regeneratePythonClient
 
       - name: Project build without testing
-        run: ./gradlew assemble
+        run: ./gradlew assemble publishToMavenLocal
 
       - name: Image build
         run: |
diff --git a/plugins/spark/v3.5/regtests/README.md 
b/plugins/spark/v3.5/regtests/README.md
index 39220352c..06a0ccd13 100755
--- a/plugins/spark/v3.5/regtests/README.md
+++ b/plugins/spark/v3.5/regtests/README.md
@@ -44,7 +44,7 @@ Tests can be run with docker-compose using the provided 
`./plugins/spark/v3.5/re
 follows:
 
 ```shell
-./gradlew build
+./gradlew assemble publishToMavenLocal
 ./gradlew \
   :polaris-server:assemble \
   :polaris-server:quarkusAppPartsBuild --rerun \
@@ -76,7 +76,7 @@ Regression tests can be run locally as well, using the test 
harness. For local t
 Scala 2.12 and Scala 2.13 are supported.
 
 To run regression tests locally, run the following:
-- `./gradlew build` -- build the Polaris project and Spark Client jars.
+- `./gradlew assemble publishToMavenLocal` -- build the Polaris project and 
Spark Client jars. Publish the binary to local maven repo.
 - `./gradlew run` -- start a Polaris server on localhost:8181.
 - `env POLARIS_HOST=localhost ./plugins/spark/v3.5/regtests/run.sh` -- run 
regtests.
 
diff --git a/plugins/spark/v3.5/regtests/docker-compose.yml 
b/plugins/spark/v3.5/regtests/docker-compose.yml
index 37fe2afe8..90019a98b 100755
--- a/plugins/spark/v3.5/regtests/docker-compose.yml
+++ b/plugins/spark/v3.5/regtests/docker-compose.yml
@@ -46,4 +46,5 @@ services:
       polaris:
         condition: service_healthy
     volumes:
+      - ~/.m2:/home/spark/.m2
       - ./output:/tmp/polaris-regtests/
diff --git a/plugins/spark/v3.5/regtests/run.sh 
b/plugins/spark/v3.5/regtests/run.sh
index 6c19cdcc8..90c2dd755 100755
--- a/plugins/spark/v3.5/regtests/run.sh
+++ b/plugins/spark/v3.5/regtests/run.sh
@@ -68,6 +68,8 @@ fi
 SPARK_MAJOR_VERSION="3.5"
 SPARK_VERSION="3.5.5"
 
+SPARK_SHELL_OPTIONS=("PACKAGE" "JAR")
+
 for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
   echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION}, 
SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}"
   # find the project jar
@@ -75,44 +77,65 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
   JAR_PATH=$(find ${SPARK_DIR} -name 
"polaris-spark-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-bundle.jar" -print 
-quit)
   echo "find jar ${JAR_PATH}"
 
+  # extract the polaris
+  JAR_NAME=$(basename "$JAR_PATH")
+  echo "JAR_NAME=${JAR_NAME}"
+  POLARIS_VERSION=$(echo "$JAR_NAME" | sed -n 
's/.*-\([0-9][^-]*.*\)-bundle\.jar/\1/p')
+  echo "$POLARIS_VERSION"
+
   SPARK_EXISTS="TRUE"
   if [ -z "${SPARK_HOME}" ]; then
     SPARK_EXISTS="FALSE"
   fi
 
-  source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion 
${SCALA_VERSION} --jar ${JAR_PATH}
-
-  # run the spark_sql test
-  loginfo "Starting test spark_sql.sh"
-
-  TEST_FILE="spark_sql.sh"
-  TEST_SHORTNAME="spark_sql"
-  
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
-  TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
-  TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
-
-  mkdir -p ${TEST_TMPDIR}
-  if (( ${VERBOSE} )); then
-    ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' | 
tee ${TEST_STDOUT}
-  else
-    ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' > 
${TEST_STDOUT}
-  fi
-  loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
-
-  TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
-  if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
-    loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
-  else
-    logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
-    echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-    echo "meld ${TEST_STDOUT} ${TEST_REF}" >> 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-    chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
-    logred "To compare and fix diffs (if 'meld' installed): 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
-    logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
-    logred "See stderr from test run for additional diagnostics: 
${TEST_STDERR}"
-    diff ${TEST_STDOUT} ${TEST_REF}
-    NUM_FAILURES=$(( NUM_FAILURES + 1 ))
-  fi
+  for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do
+    # clean up the default configuration if exists
+    if [ -f "${SPARK_HOME}" ]; then
+      SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
+          if [ -f ${SPARK_CONF} ]; then
+            rm ${SPARK_CONF}
+          fi
+    fi
+
+    if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
+      # run the setup without jar configuration
+      source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} 
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION}
+    else
+      source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} 
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar 
${JAR_PATH}
+    fi
+
+    # run the spark_sql test
+    loginfo "Starting test spark_sql.sh"
+
+    TEST_FILE="spark_sql.sh"
+    TEST_SHORTNAME="spark_sql"
+    
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
+    TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
+    TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
+
+    mkdir -p ${TEST_TMPDIR}
+    if (( ${VERBOSE} )); then
+      ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' 
| tee ${TEST_STDOUT}
+    else
+      ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' 
> ${TEST_STDOUT}
+    fi
+    loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
+
+    TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
+    if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
+      loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
+    else
+      logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
+      echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+      echo "meld ${TEST_STDOUT} ${TEST_REF}" >> 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+      chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+      logred "To compare and fix diffs (if 'meld' installed): 
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
+      logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
+      logred "See stderr from test run for additional diagnostics: 
${TEST_STDERR}"
+      diff ${TEST_STDOUT} ${TEST_REF}
+      NUM_FAILURES=$(( NUM_FAILURES + 1 ))
+    fi
+  done
 
   # clean up
   if [ "${SPARK_EXISTS}" = "FALSE" ]; then
diff --git a/plugins/spark/v3.5/regtests/setup.sh 
b/plugins/spark/v3.5/regtests/setup.sh
index 072b08f6d..a5f757b3c 100755
--- a/plugins/spark/v3.5/regtests/setup.sh
+++ b/plugins/spark/v3.5/regtests/setup.sh
@@ -39,6 +39,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> 
/dev/null && pwd )
 SPARK_VERSION=3.5.5
 SCALA_VERSION=2.12
 POLARIS_CLIENT_JAR=""
+POLARIS_VERSION=""
 while [[ $# -gt 0 ]]; do
   case "$1" in
     --sparkVersion)
@@ -51,6 +52,11 @@ while [[ $# -gt 0 ]]; do
       shift # past argument
       shift # past value
       ;;
+    --polarisVersion)
+      POLARIS_VERSION="$2"
+      shift # past argument
+      shift # past value
+      ;;
     --jar)
       POLARIS_CLIENT_JAR="$2"
       shift # past argument
@@ -62,7 +68,7 @@ while [[ $# -gt 0 ]]; do
   esac
 done
 
-echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} 
POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
+echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION} 
POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
 
 if [ "$SCALA_VERSION" == "2.12" ]; then
   SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3
@@ -129,11 +135,24 @@ else
   echo 'Setting spark conf...'
   # Instead of clobbering existing spark conf, just comment it all out in case 
it was customized carefully.
   sed -i 's/^/# /' ${SPARK_CONF}
-cat << EOF >> ${SPARK_CONF}
 
+# If POLARIS_CLIENT_JAR is provided, set the spark conf to use the jars 
configuration.
+# Otherwise use the packages setting
+if [[ -z "$POLARIS_CLIENT_JAR" ]]; then
+  cat << EOF >> ${SPARK_CONF}
+# POLARIS Spark client test conf
+spark.jars.packages 
org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1
+EOF
+else
+  cat << EOF >> ${SPARK_CONF}
 # POLARIS Spark client test conf
 spark.jars $POLARIS_CLIENT_JAR
 spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1
+EOF
+fi
+
+cat << EOF >> ${SPARK_CONF}
+
 spark.sql.variable.substitute true
 
 spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME}

Reply via email to