This is an automated email from the ASF dual-hosted git repository.
philo pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git
The following commit(s) were added to refs/heads/main by this push:
new 813bc5ea22 [GLUTEN-9328][VL] Update GlutenSQLQueryTestSuite error
handling for missing resource files and update docs (#9969)
813bc5ea22 is described below
commit 813bc5ea226dcea57bdac32b72711a44cf952e99
Author: Dina Suehiro Jones <[email protected]>
AuthorDate: Wed Jun 18 23:29:20 2025 -0700
[GLUTEN-9328][VL] Update GlutenSQLQueryTestSuite error handling for missing
resource files and update docs (#9969)
---
.github/workflows/util/install_spark_resources.sh | 10 ++++++++--
docs/developers/HowTo.md | 13 +++++++++++++
docs/velox-backend-support-progress.md | 19 ++++++++++++++++---
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 16 ++++++++++------
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 16 ++++++++++------
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 16 ++++++++++------
.../apache/spark/sql/GlutenSQLQueryTestSuite.scala | 19 ++++++++++++-------
7 files changed, 79 insertions(+), 30 deletions(-)
diff --git a/.github/workflows/util/install_spark_resources.sh
b/.github/workflows/util/install_spark_resources.sh
index 1f873dad64..c3780d03d7 100755
--- a/.github/workflows/util/install_spark_resources.sh
+++ b/.github/workflows/util/install_spark_resources.sh
@@ -68,9 +68,13 @@ function install_spark() {
echo "Skipping checksum because shasum is not installed." 1>&2
fi
- tar --strip-components=1 -xf "${local_binary}"
spark-"${spark_version}"-bin-hadoop"${hadoop_version}""${scala_suffix_short}"/jars/
+ tar --strip-components=1 -xf "${local_binary}"
spark-"${spark_version}"-bin-hadoop"${hadoop_version}""${scala_suffix_short}"/jars/
\
+
spark-"${spark_version}"-bin-hadoop"${hadoop_version}""${scala_suffix_short}"/python
\
+
spark-"${spark_version}"-bin-hadoop"${hadoop_version}""${scala_suffix_short}"/bin
mkdir -p
${INSTALL_DIR}/shims/spark"${spark_version_short}""${scala_suffix}"/spark_home/assembly/target/scala-"${scala_version}"
mv jars
${INSTALL_DIR}/shims/spark"${spark_version_short}""${scala_suffix}"/spark_home/assembly/target/scala-"${scala_version}"
+ mv python
${INSTALL_DIR}/shims/spark"${spark_version_short}""${scala_suffix}"/spark_home
+ mv bin
${INSTALL_DIR}/shims/spark"${spark_version_short}""${scala_suffix}"/spark_home
tar --strip-components=1 -xf "${local_source}"
spark-"${spark_version}"/sql/core/src/test/resources/
mkdir -p shims/spark"${spark_version_short}${scala_suffix}"/spark_home/
@@ -80,7 +84,9 @@ function install_spark() {
rm -rf "${local_source}"
}
-INSTALL_DIR=/opt/
+INSTALL_DIR=${2:-/opt/}
+mkdir -p ${INSTALL_DIR}
+
case "$1" in
3.2)
# Spark-3.2
diff --git a/docs/developers/HowTo.md b/docs/developers/HowTo.md
index c3b6e91203..5386ae7321 100644
--- a/docs/developers/HowTo.md
+++ b/docs/developers/HowTo.md
@@ -126,6 +126,19 @@ mvn test -Pspark-3.5 -Pspark-ut -Pbackends-velox
-DargLine="-Dspark.test.home=/p
```
Please set `wildcardSuites` with a fully qualified class name.
`spark.test.home` is optional to set. It is only required for some test suites
to use Spark resources.
+If you are specifying the `spark.test.home` arg, it should be set to either:
+* The path a directory containing Spark source code, which has already been
built
+* Or use the `install_spark_resources.sh` script to get a directory with the
necessary resource files:
+ ```
+ # Define a directory to use for the Spark files and the Spark version
+ export spark_dir=/tmp/spark
+ export spark_version=3.5
+
+ # Run the install_spark_resources.sh script
+ .github/workflows/util/install_spark_resources.sh ${spark_version}
${spark_dir}
+ ```
+ After running the `install_spark_resources.sh`, define the `spark.test.home`
directory like:
+ `-DargLine="-Dspark.test.home=${spark_dir}/shims/spark35/spark_home"` when
running unit tests.
For most cases, please make sure Gluten native build is done before running a
Scala/Java test.
diff --git a/docs/velox-backend-support-progress.md
b/docs/velox-backend-support-progress.md
index c1ba44421f..b454f613eb 100644
--- a/docs/velox-backend-support-progress.md
+++ b/docs/velox-backend-support-progress.md
@@ -92,10 +92,23 @@ Gluten supports 30+ operators (Drag to right to see all
data types)
Spark categorizes built-in functions into four types: Scalar Functions,
Aggregate Functions,
Window Functions, and Generator Functions.
In Gluten, function support is automatically generated by a script and
maintained in separate files.
-Run the following command to generate and update the support status. Note that
`--spark_home` should
-be set to the directory containing the Spark source code for the latest
supported Spark version in
-Gluten, and the Spark project must be built from source.
+When running the script, the `--spark_home` arg should be set to either:
+* The directory containing the Spark source code for the latest supported
Spark version in Gluten, and the Spark
+ project must be built from source.
+* Or use the `install_spark_resources.sh` script to get a directory with the
necessary resource files:
+ ```
+ # Define a directory to use for the Spark files and the latest Spark version
+ export spark_dir=/tmp/spark
+ export spark_version=3.5
+
+ # Run the install_spark_resources.sh script
+ .github/workflows/util/install_spark_resources.sh ${spark_version}
${spark_dir}
+ ```
+ After running the `install_spark_resources.sh`, the `--spark_home` for the
document generation script will be
+ something like: `--spark_home=${spark_dir}/shims/spark35/spark_home"`
+
+Use the following command to generate and update the support status:
```shell
python3 tools/scripts/gen-function-support-docs.py
--spark_home=/path/to/spark_source_code
```
diff --git
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 2b4fce3a57..0e0c0a70d5 100644
---
a/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark32/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.util.Utils
-import java.io.File
+import java.io.{File, FileNotFoundException}
import java.net.URI
import java.util.Locale
@@ -615,11 +615,15 @@ class GlutenSQLQueryTestSuite
/** Returns all the files (not directories) in a directory, recursively. */
protected def listFilesRecursively(path: File): Seq[File] = {
- val (dirs, files) = path.listFiles().partition(_.isDirectory)
- // Filter out test files with invalid extensions such as temp files created
- // by vi (.swp), Mac (.DS_Store) etc.
- val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
- filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ if (path.exists) {
+ val (dirs, files) = path.listFiles().partition(_.isDirectory)
+ // Filter out test files with invalid extensions such as temp files
created
+ // by vi (.swp), Mac (.DS_Store) etc.
+ val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
+ filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ } else {
+ throw new FileNotFoundException(s"Directory does not exist:
${path.getAbsolutePath}")
+ }
}
/** Load built-in test tables into the SparkSession. */
diff --git
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 00bae3541d..c55b744dc1 100644
---
a/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark33/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -33,7 +33,7 @@ import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.util.Utils
-import java.io.File
+import java.io.{File, FileNotFoundException}
import java.net.URI
import java.util.Locale
@@ -615,11 +615,15 @@ class GlutenSQLQueryTestSuite
/** Returns all the files (not directories) in a directory, recursively. */
protected def listFilesRecursively(path: File): Seq[File] = {
- val (dirs, files) = path.listFiles().partition(_.isDirectory)
- // Filter out test files with invalid extensions such as temp files created
- // by vi (.swp), Mac (.DS_Store) etc.
- val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
- filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ if (path.exists) {
+ val (dirs, files) = path.listFiles().partition(_.isDirectory)
+ // Filter out test files with invalid extensions such as temp files
created
+ // by vi (.swp), Mac (.DS_Store) etc.
+ val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
+ filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ } else {
+ throw new FileNotFoundException(s"Directory does not exist:
${path.getAbsolutePath}")
+ }
}
/** Load built-in test tables into the SparkSession. */
diff --git
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 30f2ed447e..fddfe83fb5 100644
---
a/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark34/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -35,7 +35,7 @@ import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.util.Utils
-import java.io.File
+import java.io.{File, FileNotFoundException}
import java.net.URI
import java.util.Locale
@@ -638,11 +638,15 @@ class GlutenSQLQueryTestSuite
/** Returns all the files (not directories) in a directory, recursively. */
protected def listFilesRecursively(path: File): Seq[File] = {
- val (dirs, files) = path.listFiles().partition(_.isDirectory)
- // Filter out test files with invalid extensions such as temp files created
- // by vi (.swp), Mac (.DS_Store) etc.
- val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
- filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ if (path.exists) {
+ val (dirs, files) = path.listFiles().partition(_.isDirectory)
+ // Filter out test files with invalid extensions such as temp files
created
+ // by vi (.swp), Mac (.DS_Store) etc.
+ val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
+ filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ } else {
+ throw new FileNotFoundException(s"Directory does not exist:
${path.getAbsolutePath}")
+ }
}
/** Load built-in test tables into the SparkSession. */
diff --git
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
index 45a76e4d81..6056ea2ac9 100644
---
a/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
+++
b/gluten-ut/spark35/src/test/scala/org/apache/spark/sql/GlutenSQLQueryTestSuite.scala
@@ -38,7 +38,7 @@ import org.apache.spark.sql.test.SharedSparkSession
import org.apache.spark.tags.ExtendedSQLTest
import org.apache.spark.util.Utils
-import java.io.File
+import java.io.{File, FileNotFoundException}
import java.net.URI
import java.util.Locale
@@ -780,17 +780,22 @@ class GlutenSQLQueryTestSuite
}
// ==== Start of modifications for Gluten. ====
- // ===- End of modifications for Gluten. ====
/** Returns all the files (not directories) in a directory, recursively. */
protected def listFilesRecursively(path: File): Seq[File] = {
- val (dirs, files) = path.listFiles().partition(_.isDirectory)
- // Filter out test files with invalid extensions such as temp files created
- // by vi (.swp), Mac (.DS_Store) etc.
- val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
- filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ if (path.exists) {
+ val (dirs, files) = path.listFiles().partition(_.isDirectory)
+ // Filter out test files with invalid extensions such as temp files
created
+ // by vi (.swp), Mac (.DS_Store) etc.
+ val filteredFiles = files.filter(_.getName.endsWith(validFileExtensions))
+ filteredFiles ++ dirs.flatMap(listFilesRecursively)
+ } else {
+ throw new FileNotFoundException(s"Directory does not exist:
${path.getAbsolutePath}")
+ }
}
+ // === End of modifications for Gluten. ====
+
/** Load built-in test tables into the SparkSession. */
private def createTestTables(session: SparkSession): Unit = {
import session.implicits._
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]