This is an automated email from the ASF dual-hosted git repository.
yzou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/polaris.git
The following commit(s) were added to refs/heads/main by this push:
new c5907e668 Add regression test coverage for Spark Client with package
conf (#1997)
c5907e668 is described below
commit c5907e66890211811bf28b9c6f2b81c14847d3a0
Author: Yun Zou <[email protected]>
AuthorDate: Thu Jul 3 17:01:37 2025 -0700
Add regression test coverage for Spark Client with package conf (#1997)
---
.github/workflows/spark_client_regtests.yml | 2 +-
plugins/spark/v3.5/regtests/README.md | 4 +-
plugins/spark/v3.5/regtests/docker-compose.yml | 1 +
plugins/spark/v3.5/regtests/run.sh | 89 ++++++++++++++++----------
plugins/spark/v3.5/regtests/setup.sh | 23 ++++++-
5 files changed, 81 insertions(+), 38 deletions(-)
diff --git a/.github/workflows/spark_client_regtests.yml
b/.github/workflows/spark_client_regtests.yml
index b4da3331e..c74e3f763 100644
--- a/.github/workflows/spark_client_regtests.yml
+++ b/.github/workflows/spark_client_regtests.yml
@@ -47,7 +47,7 @@ jobs:
run: ./gradlew regeneratePythonClient
- name: Project build without testing
- run: ./gradlew assemble
+ run: ./gradlew assemble publishToMavenLocal
- name: Image build
run: |
diff --git a/plugins/spark/v3.5/regtests/README.md
b/plugins/spark/v3.5/regtests/README.md
index 39220352c..06a0ccd13 100755
--- a/plugins/spark/v3.5/regtests/README.md
+++ b/plugins/spark/v3.5/regtests/README.md
@@ -44,7 +44,7 @@ Tests can be run with docker-compose using the provided
`./plugins/spark/v3.5/re
follows:
```shell
-./gradlew build
+./gradlew assemble publishToMavenLocal
./gradlew \
:polaris-server:assemble \
:polaris-server:quarkusAppPartsBuild --rerun \
@@ -76,7 +76,7 @@ Regression tests can be run locally as well, using the test
harness. For local t
Scala 2.12 and Scala 2.13 are supported.
To run regression tests locally, run the following:
-- `./gradlew build` -- build the Polaris project and Spark Client jars.
+- `./gradlew assemble publishToMavenLocal` -- build the Polaris project and
Spark Client jars. Publish the binary to local maven repo.
- `./gradlew run` -- start a Polaris server on localhost:8181.
- `env POLARIS_HOST=localhost ./plugins/spark/v3.5/regtests/run.sh` -- run
regtests.
diff --git a/plugins/spark/v3.5/regtests/docker-compose.yml
b/plugins/spark/v3.5/regtests/docker-compose.yml
index 37fe2afe8..90019a98b 100755
--- a/plugins/spark/v3.5/regtests/docker-compose.yml
+++ b/plugins/spark/v3.5/regtests/docker-compose.yml
@@ -46,4 +46,5 @@ services:
polaris:
condition: service_healthy
volumes:
+ - ~/.m2:/home/spark/.m2
- ./output:/tmp/polaris-regtests/
diff --git a/plugins/spark/v3.5/regtests/run.sh
b/plugins/spark/v3.5/regtests/run.sh
index 6c19cdcc8..90c2dd755 100755
--- a/plugins/spark/v3.5/regtests/run.sh
+++ b/plugins/spark/v3.5/regtests/run.sh
@@ -68,6 +68,8 @@ fi
SPARK_MAJOR_VERSION="3.5"
SPARK_VERSION="3.5.5"
+SPARK_SHELL_OPTIONS=("PACKAGE" "JAR")
+
for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
echo "RUN REGRESSION TEST FOR SPARK_MAJOR_VERSION=${SPARK_MAJOR_VERSION},
SPARK_VERSION=${SPARK_VERSION}, SCALA_VERSION=${SCALA_VERSION}"
# find the project jar
@@ -75,44 +77,65 @@ for SCALA_VERSION in "${SCALA_VERSIONS[@]}"; do
JAR_PATH=$(find ${SPARK_DIR} -name
"polaris-spark-${SPARK_MAJOR_VERSION}_${SCALA_VERSION}-*.*-bundle.jar" -print
-quit)
echo "find jar ${JAR_PATH}"
+ # extract the polaris
+ JAR_NAME=$(basename "$JAR_PATH")
+ echo "JAR_NAME=${JAR_NAME}"
+ POLARIS_VERSION=$(echo "$JAR_NAME" | sed -n
's/.*-\([0-9][^-]*.*\)-bundle\.jar/\1/p')
+ echo "$POLARIS_VERSION"
+
SPARK_EXISTS="TRUE"
if [ -z "${SPARK_HOME}" ]; then
SPARK_EXISTS="FALSE"
fi
- source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION} --scalaVersion
${SCALA_VERSION} --jar ${JAR_PATH}
-
- # run the spark_sql test
- loginfo "Starting test spark_sql.sh"
-
- TEST_FILE="spark_sql.sh"
- TEST_SHORTNAME="spark_sql"
-
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
- TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
- TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
-
- mkdir -p ${TEST_TMPDIR}
- if (( ${VERBOSE} )); then
- ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' |
tee ${TEST_STDOUT}
- else
- ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings' >
${TEST_STDOUT}
- fi
- loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
-
- TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
- if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
- loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
- else
- logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
- echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
- echo "meld ${TEST_STDOUT} ${TEST_REF}" >>
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
- chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
- logred "To compare and fix diffs (if 'meld' installed):
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
- logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
- logred "See stderr from test run for additional diagnostics:
${TEST_STDERR}"
- diff ${TEST_STDOUT} ${TEST_REF}
- NUM_FAILURES=$(( NUM_FAILURES + 1 ))
- fi
+ for SPARK_SHELL_OPTION in "${SPARK_SHELL_OPTIONS[@]}"; do
+ # clean up the default configuration if exists
+ if [ -f "${SPARK_HOME}" ]; then
+ SPARK_CONF="${SPARK_HOME}/conf/spark-defaults.conf"
+ if [ -f ${SPARK_CONF} ]; then
+ rm ${SPARK_CONF}
+ fi
+ fi
+
+ if [ "${SPARK_SHELL_OPTION}" == "PACKAGE" ]; then
+ # run the setup without jar configuration
+ source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION}
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION}
+ else
+ source ${SCRIPT_DIR}/setup.sh --sparkVersion ${SPARK_VERSION}
--scalaVersion ${SCALA_VERSION} --polarisVersion ${POLARIS_VERSION} --jar
${JAR_PATH}
+ fi
+
+ # run the spark_sql test
+ loginfo "Starting test spark_sql.sh"
+
+ TEST_FILE="spark_sql.sh"
+ TEST_SHORTNAME="spark_sql"
+
TEST_TMPDIR="/tmp/polaris-spark-regtests/${TEST_SHORTNAME}_${SPARK_MAJOR_VERSION}_${SCALA_VERSION}"
+ TEST_STDERR="${TEST_TMPDIR}/${TEST_SHORTNAME}.stderr"
+ TEST_STDOUT="${TEST_TMPDIR}/${TEST_SHORTNAME}.stdout"
+
+ mkdir -p ${TEST_TMPDIR}
+ if (( ${VERBOSE} )); then
+ ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings'
| tee ${TEST_STDOUT}
+ else
+ ${SCRIPT_DIR}/${TEST_FILE} 2>${TEST_STDERR} | grep -v 'loading settings'
> ${TEST_STDOUT}
+ fi
+ loginfo "Test run concluded for ${TEST_SUITE}:${TEST_SHORTNAME}"
+
+ TEST_REF="$(realpath ${SCRIPT_DIR})/${TEST_SHORTNAME}.ref"
+ if cmp --silent ${TEST_STDOUT} ${TEST_REF}; then
+ loggreen "Test SUCCEEDED: ${TEST_SUITE}:${TEST_SHORTNAME}"
+ else
+ logred "Test FAILED: ${TEST_SUITE}:${TEST_SHORTNAME}"
+ echo '#!/bin/bash' > ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+ echo "meld ${TEST_STDOUT} ${TEST_REF}" >>
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+ chmod 750 ${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh
+ logred "To compare and fix diffs (if 'meld' installed):
${TEST_TMPDIR}/${TEST_SHORTNAME}.fixdiffs.sh"
+ logred "Or manually diff: diff ${TEST_STDOUT} ${TEST_REF}"
+ logred "See stderr from test run for additional diagnostics:
${TEST_STDERR}"
+ diff ${TEST_STDOUT} ${TEST_REF}
+ NUM_FAILURES=$(( NUM_FAILURES + 1 ))
+ fi
+ done
# clean up
if [ "${SPARK_EXISTS}" = "FALSE" ]; then
diff --git a/plugins/spark/v3.5/regtests/setup.sh
b/plugins/spark/v3.5/regtests/setup.sh
index 072b08f6d..a5f757b3c 100755
--- a/plugins/spark/v3.5/regtests/setup.sh
+++ b/plugins/spark/v3.5/regtests/setup.sh
@@ -39,6 +39,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &>
/dev/null && pwd )
SPARK_VERSION=3.5.5
SCALA_VERSION=2.12
POLARIS_CLIENT_JAR=""
+POLARIS_VERSION=""
while [[ $# -gt 0 ]]; do
case "$1" in
--sparkVersion)
@@ -51,6 +52,11 @@ while [[ $# -gt 0 ]]; do
shift # past argument
shift # past value
;;
+ --polarisVersion)
+ POLARIS_VERSION="$2"
+ shift # past argument
+ shift # past value
+ ;;
--jar)
POLARIS_CLIENT_JAR="$2"
shift # past argument
@@ -62,7 +68,7 @@ while [[ $# -gt 0 ]]; do
esac
done
-echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION}
POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
+echo "SET UP FOR SPARK_VERSION=${SPARK_VERSION} SCALA_VERSION=${SCALA_VERSION}
POLARIS_VERSION=${POLARIS_VERSION} POLARIS_CLIENT_JAR=${POLARIS_CLIENT_JAR}"
if [ "$SCALA_VERSION" == "2.12" ]; then
SPARK_DISTRIBUTION=spark-${SPARK_VERSION}-bin-hadoop3
@@ -129,11 +135,24 @@ else
echo 'Setting spark conf...'
# Instead of clobbering existing spark conf, just comment it all out in case
it was customized carefully.
sed -i 's/^/# /' ${SPARK_CONF}
-cat << EOF >> ${SPARK_CONF}
+# If POLARIS_CLIENT_JAR is provided, set the spark conf to use the jars
configuration.
+# Otherwise use the packages setting
+if [[ -z "$POLARIS_CLIENT_JAR" ]]; then
+ cat << EOF >> ${SPARK_CONF}
+# POLARIS Spark client test conf
+spark.jars.packages
org.apache.polaris:polaris-spark-3.5_$SCALA_VERSION:$POLARIS_VERSION,io.delta:delta-spark_${SCALA_VERSION}:3.2.1
+EOF
+else
+ cat << EOF >> ${SPARK_CONF}
# POLARIS Spark client test conf
spark.jars $POLARIS_CLIENT_JAR
spark.jars.packages io.delta:delta-spark_${SCALA_VERSION}:3.2.1
+EOF
+fi
+
+cat << EOF >> ${SPARK_CONF}
+
spark.sql.variable.substitute true
spark.driver.extraJavaOptions -Dderby.system.home=${DERBY_HOME}