This is an automated email from the ASF dual-hosted git repository.

yuanzhou pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/incubator-gluten.git


The following commit(s) were added to refs/heads/main by this push:
     new 49389cd05e [GLUTEN-11088][VL] Fix Spark4.0 varchar type check suites 
(#11202)
49389cd05e is described below

commit 49389cd05ea07356f71bfdfe660410604c1461ea
Author: Jin Chengcheng <[email protected]>
AuthorDate: Thu Nov 27 11:16:41 2025 +0000

    [GLUTEN-11088][VL] Fix Spark4.0 varchar type check suites (#11202)
    
    Use one test to make sure the native code is really called, override 
function assertLengthCheckFailure to assert result correct.
    Spark 4.0 does a refactor, move the test result check to a public function, 
so we can reuse the tests by override the check function in tests
---
 .../gluten/utils/velox/VeloxTestSettings.scala     |  12 +--
 .../spark/sql/GlutenCharVarcharTestSuite.scala     | 115 ++++++---------------
 2 files changed, 33 insertions(+), 94 deletions(-)

diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
index 07437631f9..ce10ddec49 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/gluten/utils/velox/VeloxTestSettings.scala
@@ -713,16 +713,8 @@ class VeloxTestSettings extends BackendTestSettings {
     .exclude("InMemoryRelation statistics")
     // Extra ColumnarToRow is needed to transform vanilla columnar data to 
gluten columnar data.
     .exclude("SPARK-37369: Avoid redundant ColumnarToRow transition on 
InMemoryTableScan")
-  // TODO: fix in Spark-4.0
-  // enableSuite[GlutenFileSourceCharVarcharTestSuite]
-  //   .exclude("length check for input string values: nested in array")
-  //   .exclude("length check for input string values: nested in array")
-  //   .exclude("length check for input string values: nested in map key")
-  //   .exclude("length check for input string values: nested in map value")
-  //   .exclude("length check for input string values: nested in both map key 
and value")
-  //   .exclude("length check for input string values: nested in array of 
struct")
-  //   .exclude("length check for input string values: nested in array of 
array")
-  // enableSuite[GlutenDSV2CharVarcharTestSuite]
+  enableSuite[GlutenFileSourceCharVarcharTestSuite]
+  enableSuite[GlutenDSV2CharVarcharTestSuite]
   enableSuite[GlutenColumnExpressionSuite]
     // Velox raise_error('errMsg') throws a velox_user_error exception with 
the message 'errMsg'.
     // The final caught Spark exception's getCause().getMessage() contains 
'errMsg' but does not
diff --git 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
index ce2f1b465e..ed40918de6 100644
--- 
a/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
+++ 
b/gluten-ut/spark40/src/test/scala/org/apache/spark/sql/GlutenCharVarcharTestSuite.scala
@@ -16,32 +16,41 @@
  */
 package org.apache.spark.sql
 
-import org.apache.spark.SparkException
+import org.apache.spark.{SparkException, SparkRuntimeException, SparkThrowable}
+
+trait GlutenCharVarcharTestSuite extends CharVarcharTestSuite with 
GlutenSQLTestsTrait {
+  protected val ERROR_MESSAGE =
+    "Exceeds char/varchar type length limitation: 5"
+
+  protected val VELOX_ERROR_MESSAGE =
+    "Exceeds allowed length limitation: 5"
+
+  override def assertLengthCheckFailure(func: () => Unit): Unit = {
+    val e = intercept[SparkThrowable](func())
+    e match {
+      // Spark throws exception
+      case _: SparkRuntimeException =>
+        checkError(
+          exception = e,
+          condition = "EXCEED_LIMIT_LENGTH",
+          parameters = Map("limit" -> "5")
+        )
+      // Gluten throws exception. but sometimes, Spark exception is wrapped in 
GlutenException.
+      case e: SparkException =>
+        assert(e.getMessage.contains(VELOX_ERROR_MESSAGE) || 
e.getMessage.contains(ERROR_MESSAGE))
+      case _ => throw new RuntimeException(s"Unexpected exception: $e")
+    }
+  }
+}
 
 class GlutenFileSourceCharVarcharTestSuite
   extends FileSourceCharVarcharTestSuite
-  with GlutenSQLTestsTrait {
+  with GlutenCharVarcharTestSuite {
   private def testTableWrite(f: String => Unit): Unit = {
     withTable("t")(f("char"))
     withTable("t")(f("varchar"))
   }
 
-  private val ERROR_MESSAGE =
-    "Exceeds char/varchar type length limitation: 5"
-
-  testGluten("length check for input string values: nested in struct") {
-    testTableWrite {
-      typeName =>
-        sql(s"CREATE TABLE t(c STRUCT<c: $typeName(5)>) USING $format")
-        sql("INSERT INTO t SELECT struct(null)")
-        checkAnswer(spark.table("t"), Row(Row(null)))
-        val e = intercept[RuntimeException] {
-          sql("INSERT INTO t SELECT struct('123456')")
-        }
-        assert(e.getMessage.contains(ERROR_MESSAGE))
-    }
-  }
-
   testGluten("length check for input string values: nested in array") {
     testTableWrite {
       typeName =>
@@ -51,73 +60,11 @@ class GlutenFileSourceCharVarcharTestSuite
         val e = intercept[SparkException] {
           sql("INSERT INTO t VALUES (array('a', '123456'))")
         }
-        assert(e.getMessage.contains(ERROR_MESSAGE))
-    }
-  }
-
-  testGluten("length check for input string values: nested in map key") {
-    testTableWrite {
-      typeName =>
-        sql(s"CREATE TABLE t(c MAP<$typeName(5), STRING>) USING $format")
-        val e = intercept[SparkException](sql("INSERT INTO t VALUES 
(map('123456', 'a'))"))
-        assert(e.getMessage.contains(ERROR_MESSAGE))
-    }
-  }
-
-  testGluten("length check for input string values: nested in map value") {
-    testTableWrite {
-      typeName =>
-        sql(s"CREATE TABLE t(c MAP<STRING, $typeName(5)>) USING $format")
-        sql("INSERT INTO t VALUES (map('a', null))")
-        checkAnswer(spark.table("t"), Row(Map("a" -> null)))
-        val e = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', 
'123456'))"))
-        assert(e.getMessage.contains(ERROR_MESSAGE))
-    }
-  }
-
-  testGluten("length check for input string values: nested in both map key and 
value") {
-    testTableWrite {
-      typeName =>
-        sql(s"CREATE TABLE t(c MAP<$typeName(5), $typeName(5)>) USING $format")
-        val e1 = intercept[SparkException](sql("INSERT INTO t VALUES 
(map('123456', 'a'))"))
-        assert(e1.getMessage.contains(ERROR_MESSAGE))
-        val e2 = intercept[SparkException](sql("INSERT INTO t VALUES (map('a', 
'123456'))"))
-        assert(e2.getMessage.contains(ERROR_MESSAGE))
-    }
-  }
-
-  testGluten("length check for input string values: nested in struct of 
array") {
-    testTableWrite {
-      typeName =>
-        sql(s"CREATE TABLE t(c STRUCT<c: ARRAY<$typeName(5)>>) USING $format")
-        sql("INSERT INTO t SELECT struct(array(null))")
-        checkAnswer(spark.table("t"), Row(Row(Seq(null))))
-        val e = intercept[SparkException](sql("INSERT INTO t SELECT 
struct(array('123456'))"))
-        assert(e.getMessage.contains(ERROR_MESSAGE))
-    }
-  }
-
-  testGluten("length check for input string values: nested in array of 
struct") {
-    testTableWrite {
-      typeName =>
-        sql(s"CREATE TABLE t(c ARRAY<STRUCT<c: $typeName(5)>>) USING $format")
-        sql("INSERT INTO t VALUES (array(struct(null)))")
-        checkAnswer(spark.table("t"), Row(Seq(Row(null))))
-        val e = intercept[SparkException](sql("INSERT INTO t VALUES 
(array(struct('123456')))"))
-        assert(e.getMessage.contains(ERROR_MESSAGE))
-    }
-  }
-
-  testGluten("length check for input string values: nested in array of array") 
{
-    testTableWrite {
-      typeName =>
-        sql(s"CREATE TABLE t(c ARRAY<ARRAY<$typeName(5)>>) USING $format")
-        sql("INSERT INTO t VALUES (array(array(null)))")
-        checkAnswer(spark.table("t"), Row(Seq(Seq(null))))
-        val e = intercept[SparkException](sql("INSERT INTO t VALUES 
(array(array('123456')))"))
-        assert(e.getMessage.contains(ERROR_MESSAGE))
+        assert(e.getMessage.contains(VELOX_ERROR_MESSAGE))
     }
   }
 }
 
-class GlutenDSV2CharVarcharTestSuite extends DSV2CharVarcharTestSuite with 
GlutenSQLTestsTrait {}
+class GlutenDSV2CharVarcharTestSuite
+  extends DSV2CharVarcharTestSuite
+  with GlutenCharVarcharTestSuite {}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to