This is an automated email from the ASF dual-hosted git repository.
viirya pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new e48b12ea build: Add Spark SQL test pipeline with ANSI mode enabled
(#321)
e48b12ea is described below
commit e48b12ead988bdb842728881181d49eca399982a
Author: Parth Chandra <[email protected]>
AuthorDate: Fri May 3 09:36:11 2024 -0700
build: Add Spark SQL test pipeline with ANSI mode enabled (#321)
* build: Add Spark SQL test pipeline with ANSI mode enabled
* add ENABLE_COMET_ANSI_MODE to actual run of tests
* fix diff and rat exclusion
* fix diff
* Make workflow manual run only
* fix diff
---
.github/workflows/spark_sql_test_ansi.yml | 81 +++++++++++++++++++++++++++++++
dev/diffs/3.4.2.diff | 40 ++++++++++++---
pom.xml | 2 +-
3 files changed, 114 insertions(+), 9 deletions(-)
diff --git a/.github/workflows/spark_sql_test_ansi.yml
b/.github/workflows/spark_sql_test_ansi.yml
new file mode 100644
index 00000000..5c5d2858
--- /dev/null
+++ b/.github/workflows/spark_sql_test_ansi.yml
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+name: Spark SQL Tests (ANSI mode)
+
+concurrency:
+ group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{
github.workflow }}
+ cancel-in-progress: true
+
+on:
+ # enable the following once Ansi support is completed
+ # push:
+ # paths-ignore:
+ # - "doc/**"
+ # - "**.md"
+ # pull_request:
+ # paths-ignore:
+ # - "doc/**"
+ # - "**.md"
+
+ # manual trigger ONLY
+ #
https://docs.github.com/en/actions/managing-workflow-runs/manually-running-a-workflow
+ workflow_dispatch:
+
+env:
+ RUST_VERSION: nightly
+
+jobs:
+ spark-sql-catalyst:
+ strategy:
+ matrix:
+ os: [ubuntu-latest]
+ java-version: [11]
+ spark-version: [{short: '3.4', full: '3.4.2'}]
+ module:
+ - {name: "catalyst", args1: "catalyst/test", args2: ""}
+ - {name: "sql/core-1", args1: "", args2: sql/testOnly * -- -l
org.apache.spark.tags.ExtendedSQLTest -l org.apache.spark.tags.SlowSQLTest}
+ - {name: "sql/core-2", args1: "", args2: "sql/testOnly * -- -n
org.apache.spark.tags.ExtendedSQLTest"}
+ - {name: "sql/core-3", args1: "", args2: "sql/testOnly * -- -n
org.apache.spark.tags.SlowSQLTest"}
+ - {name: "sql/hive-1", args1: "", args2: "hive/testOnly * -- -l
org.apache.spark.tags.ExtendedHiveTest -l org.apache.spark.tags.SlowHiveTest"}
+ - {name: "sql/hive-2", args1: "", args2: "hive/testOnly * -- -n
org.apache.spark.tags.ExtendedHiveTest"}
+ - {name: "sql/hive-3", args1: "", args2: "hive/testOnly * -- -n
org.apache.spark.tags.SlowHiveTest"}
+ fail-fast: false
+ name: spark-sql-${{ matrix.module.name }}/${{ matrix.os }}/spark-${{
matrix.spark-version.full }}/java-${{ matrix.java-version }}
+ runs-on: ${{ matrix.os }}
+ container:
+ image: amd64/rust
+ steps:
+ - uses: actions/checkout@v4
+ - name: Setup Rust & Java toolchain
+ uses: ./.github/actions/setup-builder
+ with:
+ rust-version: ${{env.RUST_VERSION}}
+ jdk-version: ${{ matrix.java-version }}
+ - name: Setup Spark
+ uses: ./.github/actions/setup-spark-builder
+ with:
+ spark-version: ${{ matrix.spark-version.full }}
+ spark-short-version: ${{ matrix.spark-version.short }}
+ comet-version: '0.1.0-SNAPSHOT' # TODO: get this from pom.xml
+ - name: Run Spark tests
+ run: |
+ cd apache-spark
+ ENABLE_COMET=true ENABLE_COMET_ANSI_MODE=true build/sbt ${{
matrix.module.args1 }} "${{ matrix.module.args2 }}"
+ env:
+ LC_ALL: "C.UTF-8"
+
diff --git a/dev/diffs/3.4.2.diff b/dev/diffs/3.4.2.diff
index 7c7323d3..4154a705 100644
--- a/dev/diffs/3.4.2.diff
+++ b/dev/diffs/3.4.2.diff
@@ -1327,7 +1327,7 @@ index abe606ad9c1..2d930b64cca 100644
val tblTargetName = "tbl_target"
val tblSourceQualified = s"default.$tblSourceName"
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
-index dd55fcfe42c..cc18147d17a 100644
+index dd55fcfe42c..b4776c50e49 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/test/SQLTestUtils.scala
@@ -41,6 +41,7 @@ import org.apache.spark.sql.catalyst.plans.PlanTest
@@ -1351,7 +1351,7 @@ index dd55fcfe42c..cc18147d17a 100644
}
}
-@@ -242,6 +247,23 @@ private[sql] trait SQLTestUtilsBase
+@@ -242,6 +247,32 @@ private[sql] trait SQLTestUtilsBase
protected override def _sqlContext: SQLContext = self.spark.sqlContext
}
@@ -1371,11 +1371,20 @@ index dd55fcfe42c..cc18147d17a 100644
+ val v = System.getenv("ENABLE_COMET_SCAN_ONLY")
+ v != null && v.toBoolean
+ }
++
++ /**
++ * Whether to enable ansi mode This is only effective when
++ * [[isCometEnabled]] returns true.
++ */
++ protected def enableCometAnsiMode: Boolean = {
++ val v = System.getenv("ENABLE_COMET_ANSI_MODE")
++ v != null && v.toBoolean
++ }
+
protected override def withSQLConf(pairs: (String, String)*)(f: => Unit):
Unit = {
SparkSession.setActiveSession(spark)
super.withSQLConf(pairs: _*)(f)
-@@ -434,6 +456,8 @@ private[sql] trait SQLTestUtilsBase
+@@ -434,6 +465,8 @@ private[sql] trait SQLTestUtilsBase
val schema = df.schema
val withoutFilters = df.queryExecution.executedPlan.transform {
case FilterExec(_, child) => child
@@ -1385,10 +1394,10 @@ index dd55fcfe42c..cc18147d17a 100644
spark.internalCreateDataFrame(withoutFilters.execute(), schema)
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-index ed2e309fa07..4cfe0093da7 100644
+index ed2e309fa07..f64cc283903 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/test/SharedSparkSession.scala
-@@ -74,6 +74,21 @@ trait SharedSparkSessionBase
+@@ -74,6 +74,28 @@ trait SharedSparkSessionBase
// this rule may potentially block testing of other optimization rules
such as
// ConstantPropagation etc.
.set(SQLConf.OPTIMIZER_EXCLUDED_RULES.key,
ConvertToLocalRelation.ruleName)
@@ -1406,6 +1415,13 @@ index ed2e309fa07..4cfe0093da7 100644
+
"org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
+ .set("spark.comet.exec.shuffle.enabled", "true")
+ }
++
++ if (enableCometAnsiMode) {
++ conf
++ .set("spark.sql.ansi.enabled", "true")
++ .set("spark.comet.ansi.enabled", "true")
++ }
++
+ }
conf.set(
StaticSQLConf.WAREHOUSE_PATH,
@@ -1447,10 +1463,10 @@ index 1966e1e64fd..cde97a0aafe 100644
spark.sql(
"""
diff --git
a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-index 07361cfdce9..c5d94c92e32 100644
+index 07361cfdce9..1763168a808 100644
--- a/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
+++ b/sql/hive/src/test/scala/org/apache/spark/sql/hive/test/TestHive.scala
-@@ -55,25 +55,46 @@ object TestHive
+@@ -55,25 +55,54 @@ object TestHive
new SparkContext(
System.getProperty("spark.sql.test.master", "local[1]"),
"TestSQLContext",
@@ -1507,8 +1523,16 @@ index 07361cfdce9..c5d94c92e32 100644
+
"org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager")
+ .set("spark.comet.exec.shuffle.enabled", "true")
+ }
-+ }
++
++ val a = System.getenv("ENABLE_COMET_ANSI_MODE")
++ if (a != null && a.toBoolean) {
++ conf
++ .set("spark.sql.ansi.enabled", "true")
++ .set("spark.comet.ansi.enabled", "true")
++ }
++ }
++
+ conf
+ }
+ ))
diff --git a/pom.xml b/pom.xml
index 6d28c816..d47953fa 100644
--- a/pom.xml
+++ b/pom.xml
@@ -886,7 +886,7 @@ under the License.
<exclude>rust-toolchain</exclude>
<exclude>Makefile</exclude>
<exclude>dev/Dockerfile*</exclude>
- <exclude>dev/diff/**</exclude>
+ <exclude>dev/diffs/**</exclude>
<exclude>dev/deploy-file</exclude>
<exclude>**/test/resources/**</exclude>
<exclude>**/benchmarks/*.txt</exclude>
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]