(incubator-gluten) branch main updated: [GLUTEN-11088][VL] Enable some ignored tests in Spark-4.0 (#11235)

marong Mon, 01 Dec 2025 20:06:55 -0800

This is an automated email from the ASF dual-hosted git repository.

marong pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git



The following commit(s) were added to refs/heads/main by this push:
     new c4da5e8fa2 [GLUTEN-11088][VL] Enable some ignored tests in Spark-4.0 
(#11235)
c4da5e8fa2 is described below

commit c4da5e8fa2524abaf539712c18961cc464621ba9
Author: Jin Chengcheng <[email protected]>
AuthorDate: Tue Dec 2 04:06:42 2025 +0000

    [GLUTEN-11088][VL] Enable some ignored tests in Spark-4.0 (#11235)
---
 .../gluten/utils/velox/VeloxTestSettings.scala     | 42 +-----------------
 .../GlutenApproximatePercentileQuerySuite.scala    |  2 +-
 .../spark/sql/GlutenDataFramePivotSuite.scala      | 25 +----------
 ...GlutenUnwrapCastInComparisonEndToEndSuite.scala | 39 +----------------
 .../spark/sql/execution/GlutenExchangeSuite.scala  | 51 ++--------------------
 5 files changed, 8 insertions(+), 151 deletions(-)

diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 19bc85dbcf..16d21206bd 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -624,11 +624,8 @@ class VeloxTestSettings extends BackendTestSettings {
     // Rewrite for Gluten. Change details are in the inline comments in 
individual tests.
     .excludeByPrefix("determining the number of reducers")
   enableSuite[GlutenExchangeSuite]
-    // ColumnarShuffleExchangeExec does not support doExecute() method
-    .exclude("shuffling UnsafeRows in exchange")
-    // This test will re-run in GlutenExchangeSuite with shuffle partitions > 1
-    .exclude("Exchange reuse across the whole plan")
   enableSuite[GlutenReplaceHashWithSortAggSuite]
+    // Rewrite to check plan and some adds order by for result sort order
     .exclude("replace partial hash aggregate with sort aggregate")
     .exclude("replace partial and final hash aggregate together with sort 
aggregate")
     .exclude("do not replace hash aggregate if child does not have sort order")
@@ -670,11 +667,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("disable bucketing when the output doesn't contain all bucketing 
columns")
     .excludeByPrefix("bucket coalescing is applied when join expressions 
match")
   enableSuite[GlutenBucketedWriteWithoutHiveSupportSuite]
-    .exclude("write bucketed data")
-    .exclude("write bucketed data with sortBy")
-    .exclude("write bucketed data without partitionBy")
-    .exclude("write bucketed data without partitionBy with sortBy")
-    .exclude("write bucketed data with bucketing disabled")
   enableSuite[GlutenCreateTableAsSelectSuite]
     // TODO Gluten can not catch the spark exception in Driver side.
     .exclude("CREATE TABLE USING AS SELECT based on the file without write 
permission")
@@ -707,8 +699,6 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenTableScanSuite]
   enableSuite[GlutenApproxCountDistinctForIntervalsQuerySuite]
   enableSuite[GlutenApproximatePercentileQuerySuite]
-    // requires resource files from Vanilla spark jar
-    .exclude("SPARK-32908: maximum target error in percentile_approx")
   enableSuite[GlutenCachedTableSuite]
     .exclude("A cached table preserves the partitioning and ordering of its 
cached SparkPlan")
     .exclude("InMemoryRelation statistics")
@@ -724,8 +714,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("assert_true")
   enableSuite[GlutenComplexTypeSuite]
   enableSuite[GlutenConfigBehaviorSuite]
-    // Will be fixed by cleaning up ColumnarShuffleExchangeExec.
-    .exclude("SPARK-22160 
spark.sql.execution.rangeExchange.sampleSizePerPartition")
     // Gluten columnar operator will have different number of jobs
     .exclude("SPARK-40211: customize initialNumPartitions for take")
   enableSuite[GlutenCountMinSketchAggQuerySuite]
@@ -737,10 +725,7 @@ class VeloxTestSettings extends BackendTestSettings {
     // Test for vanilla spark codegen, not apply for Gluten
     .exclude("SPARK-43876: Enable fast hashmap for distinct queries")
     .exclude(
-      "zero moments", // [velox does not return NaN]
       "SPARK-26021: NaN and -0.0 in grouping expressions", // NaN case
-      // incorrect result, distinct NaN case
-      "SPARK-32038: NormalizeFloatingNumbers should work on distinct 
aggregate",
       // Replaced with another test.
       "SPARK-19471: AggregationIterator does not initialize the generated 
result projection" +
         " before using it",
@@ -753,9 +738,6 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenDataFrameAsOfJoinSuite]
   enableSuite[GlutenDataFrameComplexTypeSuite]
   enableSuite[GlutenDataFrameFunctionsSuite]
-    // blocked by Velox-5768
-    .exclude("aggregate function - array for primitive type containing null")
-    .exclude("aggregate function - array for non-primitive type")
     // Rewrite this test because Velox sorts rows by key for primitive data 
types, which disrupts the original row sequence.
     .exclude("map_zip_with function - map of primitive types")
     // Vanilla spark throw SparkRuntimeException, gluten throw SparkException.
@@ -765,23 +747,14 @@ class VeloxTestSettings extends BackendTestSettings {
   enableSuite[GlutenDataFrameImplicitsSuite]
   enableSuite[GlutenDataFrameJoinSuite]
   enableSuite[GlutenDataFrameNaFunctionsSuite]
-    .exclude(
-      // NaN case
-      "replace nan with float",
-      "replace nan with double"
-    )
   enableSuite[GlutenDataFramePivotSuite]
-    // substring issue
-    .exclude("pivot with column definition in groupby")
-    // array comparison not supported for values that contain nulls
-    .exclude(
-      "pivot with null and aggregate type not supported by PivotFirst returns 
correct result")
   enableSuite[GlutenDataFrameRangeSuite]
     .exclude("SPARK-20430 Initialize Range parameters in a driver side")
     .excludeByPrefix("Cancelling stage in a query with Range")
   enableSuite[GlutenDataFrameSelfJoinSuite]
   enableSuite[GlutenDataFrameSessionWindowingSuite]
   enableSuite[GlutenDataFrameSetOperationsSuite]
+    // Ignore because it checks Spark's physical operators not  
ColumnarUnionExec
     .exclude("SPARK-37371: UnionExec should support columnar if all children 
support columnar")
     // Result depends on the implementation for nondeterministic expression 
rand.
     // Not really an issue.
@@ -797,12 +770,8 @@ class VeloxTestSettings extends BackendTestSettings {
        */
       "repartitionByRange",
       "distributeBy and localSort",
-      // Mismatch when max NaN and infinite value
-      "NaN is greater than all other non-NaN numeric values",
       // Rewrite this test because the describe functions creates unmatched 
plan.
       "describe",
-      // decimal failed ut.
-      "SPARK-22271: mean overflows and returns null for some decimal 
variables",
       // Result depends on the implementation for nondeterministic expression 
rand.
       // Not really an issue.
       "SPARK-9083: sort with non-deterministic expressions"
@@ -824,11 +793,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude(
       "SPARK-38237: require all cluster keys for child required distribution 
for window query")
   enableSuite[GlutenDataFrameWindowFramesSuite]
-    // Local window fixes are not added.
-    .exclude("range between should accept int/long values as boundary")
-    .exclude("unbounded preceding/following range between with aggregation")
-    .exclude("sliding range between with aggregation")
-    .exclude("store and retrieve column stats in different time zones")
   enableSuite[GlutenDataFrameWriterV2Suite]
   enableSuite[GlutenDatasetAggregatorSuite]
   enableSuite[GlutenDatasetCacheSuite]
@@ -954,8 +918,6 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("SPARK-51738: IN subquery with struct type")
   enableSuite[GlutenTypedImperativeAggregateSuite]
   enableSuite[GlutenUnwrapCastInComparisonEndToEndSuite]
-    // Rewrite with NaN test cases excluded.
-    .exclude("cases when literal is max")
   enableSuite[GlutenXPathFunctionsSuite]
   enableSuite[GlutenFallbackSuite]
   enableSuite[GlutenHiveSQLQuerySuite]
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenApproximatePercentileQuerySuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenApproximatePercentileQuerySuite.scala
index eb82baa78d..8fb40f1a20 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenApproximatePercentileQuerySuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenApproximatePercentileQuerySuite.scala
@@ -21,6 +21,6 @@ class GlutenApproximatePercentileQuerySuite
   with GlutenSQLTestsTrait {
 
   override def testFile(fileName: String): String = {
-    Thread.currentThread().getContextClassLoader.getResource(fileName).toString
+    getWorkspaceFilePath("sql", "core", "src", "test", "resources").toString + 
"/" + fileName
   }
 }
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFramePivotSuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFramePivotSuite.scala
index e1b91d7199..fe53c68401 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFramePivotSuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenDataFramePivotSuite.scala
@@ -16,27 +16,4 @@
  */
 package org.apache.spark.sql
 
-import org.apache.spark.sql.functions._
-
-class GlutenDataFramePivotSuite extends DataFramePivotSuite with 
GlutenSQLTestsTrait {
-
-  // This test is ported from vanilla spark with pos value (1-based) changed 
from 0 to 1 for
-  // substring. In vanilla spark, pos=0 has same effectiveness as pos=1. But 
in velox, pos=0
-  // will return an empty string as substring result.
-  testGluten("pivot with column definition in groupby - using pos=1") {
-    val df = courseSales
-      .groupBy(substring(col("course"), 1, 1).as("foo"))
-      .pivot("year", Seq(2012, 2013))
-      .sum("earnings")
-      .queryExecution
-      .executedPlan
-
-    checkAnswer(
-      courseSales
-        .groupBy(substring(col("course"), 1, 1).as("foo"))
-        .pivot("year", Seq(2012, 2013))
-        .sum("earnings"),
-      Row("d", 15000.0, 48000.0) :: Row("J", 20000.0, 30000.0) :: Nil
-    )
-  }
-}
+class GlutenDataFramePivotSuite extends DataFramePivotSuite with 
GlutenSQLTestsTrait {}
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenUnwrapCastInComparisonEndToEndSuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenUnwrapCastInComparisonEndToEndSuite.scala
index 2dcde94c13..2d34e4de97 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenUnwrapCastInComparisonEndToEndSuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenUnwrapCastInComparisonEndToEndSuite.scala
@@ -18,41 +18,4 @@ package org.apache.spark.sql
 
 class GlutenUnwrapCastInComparisonEndToEndSuite
   extends UnwrapCastInComparisonEndToEndSuite
-  with GlutenSQLTestsTrait {
-
-  import testImplicits._
-
-  testGluten("cases when literal is max") {
-    withTable(t) {
-      Seq[(Integer, java.lang.Short, java.lang.Float)](
-        (1, 100.toShort, 3.14.toFloat),
-        (2, Short.MaxValue, Float.NaN),
-        (3, Short.MinValue, Float.PositiveInfinity),
-        (4, 0.toShort, Float.MaxValue),
-        (5, null, null))
-        .toDF("c1", "c2", "c3")
-        .write
-        .saveAsTable(t)
-      val df = spark.table(t)
-
-      val lit = Short.MaxValue.toInt
-      checkAnswer(df.where(s"c2 > $lit").select("c1"), Seq.empty)
-      checkAnswer(df.where(s"c2 >= $lit").select("c1"), Row(2))
-      checkAnswer(df.where(s"c2 == $lit").select("c1"), Row(2))
-      checkAnswer(df.where(s"c2 <=> $lit").select("c1"), Row(2))
-      checkAnswer(df.where(s"c2 != $lit").select("c1"), Row(1) :: Row(3) :: 
Row(4) :: Nil)
-      checkAnswer(df.where(s"c2 <= $lit").select("c1"), Row(1) :: Row(2) :: 
Row(3) :: Row(4) :: Nil)
-      checkAnswer(df.where(s"c2 < $lit").select("c1"), Row(1) :: Row(3) :: 
Row(4) :: Nil)
-
-      // NaN is not supported in velox, so unexpected result will be obtained.
-//      checkAnswer(df.where(s"c3 > double('nan')").select("c1"), Seq.empty)
-//      checkAnswer(df.where(s"c3 >= double('nan')").select("c1"), Row(2))
-//      checkAnswer(df.where(s"c3 == double('nan')").select("c1"), Row(2))
-//      checkAnswer(df.where(s"c3 <=> double('nan')").select("c1"), Row(2))
-//    checkAnswer(df.where(s"c3 != double('nan')").select("c1"), Row(1) :: 
Row(3) :: Row(4) :: Nil)
-//      checkAnswer(df.where(s"c3 <= double('nan')").select("c1"),
-//        Row(1) :: Row(2) :: Row(3) :: Row(4) :: Nil)
-//      checkAnswer(df.where(s"c3 < double('nan')").select("c1"), Row(1) :: 
Row(3) :: Row(4) :: Nil)
-    }
-  }
-}
+  with GlutenSQLTestsTrait {}
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenExchangeSuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenExchangeSuite.scala
index bc15153cca..9ff51f84f6 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenExchangeSuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/execution/GlutenExchangeSuite.scala
@@ -16,57 +16,12 @@
  */
 package org.apache.spark.sql.execution
 
+import org.apache.spark.SparkConf
 import org.apache.spark.sql.GlutenSQLTestsBaseTrait
-import org.apache.spark.sql.execution.exchange.{Exchange, ReusedExchangeExec}
-import org.apache.spark.sql.internal.SQLConf
 
 class GlutenExchangeSuite extends ExchangeSuite with GlutenSQLTestsBaseTrait {
 
-  testGluten("Exchange reuse across the whole plan with shuffle partition 2") {
-    // The shuffle exchange will be inserted between Aggregate
-    // when shuffle partition is > 1.
-    withSQLConf(
-      SQLConf.ADAPTIVE_EXECUTION_ENABLED.key -> "false",
-      SQLConf.SHUFFLE_PARTITIONS.key -> "2") {
-      val df = sql("""
-                     |SELECT
-                     |  (SELECT max(a.key) FROM testData AS a JOIN testData AS 
b ON b.key = a.key),
-                     |  a.key
-                     |FROM testData AS a
-                     |JOIN testData AS b ON b.key = a.key
-      """.stripMargin)
-
-      val plan = df.queryExecution.executedPlan
-
-      val exchangeIds = plan.collectWithSubqueries { case e: Exchange => e.id }
-      val reusedExchangeIds = plan.collectWithSubqueries {
-        case re: ReusedExchangeExec => re.child.id
-      }
-
-      assert(exchangeIds.size == 2, "Whole plan exchange reusing not working 
correctly")
-      assert(reusedExchangeIds.size == 3, "Whole plan exchange reusing not 
working correctly")
-      assert(
-        reusedExchangeIds.forall(exchangeIds.contains(_)),
-        "ReusedExchangeExec should reuse an existing exchange")
-
-      val df2 = sql("""
-                      |SELECT
-                      |  (SELECT min(a.key) FROM testData AS a JOIN testData 
AS b ON b.key = a.key),
-                      |  (SELECT max(a.key) FROM testData AS a JOIN testData2 
AS b ON b.a = a.key)
-      """.stripMargin)
-
-      val plan2 = df2.queryExecution.executedPlan
-
-      val exchangeIds2 = plan2.collectWithSubqueries { case e: Exchange => 
e.id }
-      val reusedExchangeIds2 = plan2.collectWithSubqueries {
-        case re: ReusedExchangeExec => re.child.id
-      }
-
-      assert(exchangeIds2.size == 4, "Whole plan exchange reusing not working 
correctly")
-      assert(reusedExchangeIds2.size == 2, "Whole plan exchange reusing not 
working correctly")
-      assert(
-        reusedExchangeIds2.forall(exchangeIds2.contains(_)),
-        "ReusedExchangeExec should reuse an existing exchange")
-    }
+  override def sparkConf: SparkConf = {
+    super.sparkConf.set("spark.sql.shuffle.partitions", "2")
   }
 }


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(incubator-gluten) branch main updated: [GLUTEN-11088][VL] Enable some ignored tests in Spark-4.0 (#11235)

Reply via email to