This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 8769b9e5a feat: Add bash script to build and run fuzz testing (#2686)
8769b9e5a is described below

commit 8769b9e5a443d31815c9017a08f283a3c856f44f
Author: Manu Zhang <[email protected]>
AuthorDate: Fri Nov 14 03:46:25 2025 +0800

    feat: Add bash script to build and run fuzz testing (#2686)
---
 fuzz-testing/run.sh | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/fuzz-testing/run.sh b/fuzz-testing/run.sh
new file mode 100755
index 000000000..27391249c
--- /dev/null
+++ b/fuzz-testing/run.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+# Usage: ./run.sh
+# Builds necessary JARs, generates data and queries, and runs fuzz tests for 
Comet Spark.
+# Environment variables:
+#   SPARK_HOME - path to Spark installation
+#   SPARK_MASTER - Spark master URL (default: local[*])
+#   SCALA_MAJOR_VERSION - Scala major version to use (default: 2.12)
+#   SPARK_MAJOR_VERSION - Spark major version to use (default: 3.5)
+#   NUM_FILES - number of data files to generate (default: 2)
+#   NUM_ROWS - number of rows per file (default: 200)
+#   NUM_QUERIES - number of queries to generate (default: 500)
+
+set -eux
+
+DIR="$(cd "$(dirname "$0")" && pwd)"
+PARENT_DIR="${DIR}/.."
+MVN_CMD="${PARENT_DIR}/mvnw"
+SPARK_MASTER="${SPARK_MASTER:-local[*]}"
+SCALA_MAJOR_VERSION="${SCALA_MAJOR_VERSION:-2.12}"
+SPARK_MAJOR_VERSION="${SPARK_MAJOR_VERSION:-3.5}"
+PROFILES="-Pscala-${SCALA_MAJOR_VERSION},spark-${SPARK_MAJOR_VERSION}"
+PROJECT_VERSION=$("${MVN_CMD}" -f "${DIR}/pom.xml" -q help:evaluate 
-Dexpression=project.version -DforceStdout)
+COMET_SPARK_JAR="${PARENT_DIR}/spark/target/comet-spark${SPARK_MAJOR_VERSION}_${SCALA_MAJOR_VERSION}-${PROJECT_VERSION}.jar"
+COMET_FUZZ_JAR="${DIR}/target/comet-fuzz-spark${SPARK_MAJOR_VERSION}_${SCALA_MAJOR_VERSION}-${PROJECT_VERSION}-jar-with-dependencies.jar"
+NUM_FILES="${NUM_FILES:-2}"
+NUM_ROWS="${NUM_ROWS:-200}"
+NUM_QUERIES="${NUM_QUERIES:-500}"
+
+if [ ! -f "${COMET_SPARK_JAR}" ]; then
+  echo "Building Comet Spark jar..."
+  pushd "${PARENT_DIR}"
+  PROFILES="${PROFILES}" make
+  popd
+else
+  echo "Building Fuzz testing jar..."
+  "${MVN_CMD}" -f "${DIR}/pom.xml" package -DskipTests "${PROFILES}"
+fi
+
+echo "Generating data..."
+"${SPARK_HOME}/bin/spark-submit" \
+  --master "${SPARK_MASTER}" \
+  --class org.apache.comet.fuzz.Main \
+  "${COMET_FUZZ_JAR}" \
+  data --num-files="${NUM_FILES}" --num-rows="${NUM_ROWS}" \
+  --exclude-negative-zero \
+  --generate-arrays --generate-structs --generate-maps
+
+echo "Generating queries..."
+"${SPARK_HOME}/bin/spark-submit" \
+  --master "${SPARK_MASTER}" \
+  --class org.apache.comet.fuzz.Main \
+  "${COMET_FUZZ_JAR}" \
+  queries --num-files="${NUM_FILES}" --num-queries="${NUM_QUERIES}"
+
+echo "Running fuzz tests..."
+"${SPARK_HOME}/bin/spark-submit" \
+  --master "${SPARK_MASTER}" \
+  --conf spark.memory.offHeap.enabled=true \
+  --conf spark.memory.offHeap.size=16G \
+  --conf spark.plugins=org.apache.spark.CometPlugin \
+  --conf spark.comet.enabled=true \
+  --conf 
spark.shuffle.manager=org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager
 \
+  --conf spark.comet.exec.shuffle.enabled=true \
+  --jars "${COMET_SPARK_JAR}" \
+  --conf spark.driver.extraClassPath="${COMET_SPARK_JAR}" \
+  --conf spark.executor.extraClassPath="${COMET_SPARK_JAR}" \
+  --class org.apache.comet.fuzz.Main \
+  "${COMET_FUZZ_JAR}" \
+  run --num-files="${NUM_FILES}" --filename="queries.sql"


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to