This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 26b8d57a8 fix: Mark cast from float/double to decimal as incompatible 
(#1372)
26b8d57a8 is described below

commit 26b8d57a87efe7b0e1cbc9a2f9eb1036faa13fa2
Author: Andy Grove <[email protected]>
AuthorDate: Fri Feb 7 12:07:17 2025 -0800

    fix: Mark cast from float/double to decimal as incompatible (#1372)
    
    * add failing test
    
    * Mark cast from float/double to decimal as incompat
    
    * update docs
    
    * update cast tests
    
    * link to issue
    
    * fix regressions
    
    * use unique table name in test
    
    * use withTable
    
    * address feedback
---
 docs/source/user-guide/compatibility.md            |  4 +--
 .../org/apache/comet/expressions/CometCast.scala   |  8 +++--
 .../apache/comet/testing/ParquetGenerator.scala    | 10 +++----
 .../scala/org/apache/comet/CometCastSuite.scala    | 18 +++++++++--
 .../apache/comet/exec/CometAggregateSuite.scala    | 35 ++++++++++++++++++----
 5 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/docs/source/user-guide/compatibility.md 
b/docs/source/user-guide/compatibility.md
index 893326e19..d603c0b99 100644
--- a/docs/source/user-guide/compatibility.md
+++ b/docs/source/user-guide/compatibility.md
@@ -117,7 +117,6 @@ The following cast operations are generally compatible with 
Spark except for the
 | float | integer |  |
 | float | long |  |
 | float | double |  |
-| float | decimal |  |
 | float | string | There can be differences in precision. For example, the 
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
 | double | boolean |  |
 | double | byte |  |
@@ -125,7 +124,6 @@ The following cast operations are generally compatible with 
Spark except for the
 | double | integer |  |
 | double | long |  |
 | double | float |  |
-| double | decimal |  |
 | double | string | There can be differences in precision. For example, the 
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
 | decimal | byte |  |
 | decimal | short |  |
@@ -154,6 +152,8 @@ The following cast operations are not compatible with Spark 
for all inputs and a
 |-|-|-|
 | integer | decimal  | No overflow check |
 | long | decimal  | No overflow check |
+| float | decimal  | There can be rounding differences |
+| double | decimal  | There can be rounding differences |
 | string | float  | Does not support inputs ending with 'd' or 'f'. Does not 
support 'inf'. Does not support ANSI mode. |
 | string | double  | Does not support inputs ending with 'd' or 'f'. Does not 
support 'inf'. Does not support ANSI mode. |
 | string | decimal  | Does not support inputs ending with 'd' or 'f'. Does not 
support 'inf'. Does not support ANSI mode. Returns 0.0 instead of null if input 
contains no digits |
diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala 
b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
index 6b0b10d80..9aa7f7c8b 100644
--- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
+++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -267,7 +267,9 @@ object CometCast {
     case DataTypes.BooleanType | DataTypes.DoubleType | DataTypes.ByteType | 
DataTypes.ShortType |
         DataTypes.IntegerType | DataTypes.LongType =>
       Compatible()
-    case _: DecimalType => Compatible()
+    case _: DecimalType =>
+      // https://github.com/apache/datafusion-comet/issues/1371
+      Incompatible(Some("There can be rounding differences"))
     case _ => Unsupported
   }
 
@@ -275,7 +277,9 @@ object CometCast {
     case DataTypes.BooleanType | DataTypes.FloatType | DataTypes.ByteType | 
DataTypes.ShortType |
         DataTypes.IntegerType | DataTypes.LongType =>
       Compatible()
-    case _: DecimalType => Compatible()
+    case _: DecimalType =>
+      // https://github.com/apache/datafusion-comet/issues/1371
+      Incompatible(Some("There can be rounding differences"))
     case _ => Unsupported
   }
 
diff --git 
a/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala 
b/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
index f209cc4c9..acb565ec0 100644
--- a/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
+++ b/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
@@ -212,8 +212,8 @@ object ParquetGenerator {
 }
 
 case class DataGenOptions(
-    allowNull: Boolean,
-    generateNegativeZero: Boolean,
-    generateArray: Boolean,
-    generateStruct: Boolean,
-    generateMap: Boolean)
+    allowNull: Boolean = true,
+    generateNegativeZero: Boolean = true,
+    generateArray: Boolean = false,
+    generateStruct: Boolean = false,
+    generateMap: Boolean = false)
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala 
b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 27d8e2357..e2b2ed55a 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -348,10 +348,17 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     castTest(generateFloats(), DataTypes.DoubleType)
   }
 
-  test("cast FloatType to DecimalType(10,2)") {
+  ignore("cast FloatType to DecimalType(10,2)") {
+    // // https://github.com/apache/datafusion-comet/issues/1371
     castTest(generateFloats(), DataTypes.createDecimalType(10, 2))
   }
 
+  test("cast FloatType to DecimalType(10,2) - allow incompat") {
+    withSQLConf(CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
+      castTest(generateFloats(), DataTypes.createDecimalType(10, 2))
+    }
+  }
+
   test("cast FloatType to StringType") {
     // https://github.com/apache/datafusion-comet/issues/312
     val r = new Random(0)
@@ -401,10 +408,17 @@ class CometCastSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     castTest(generateDoubles(), DataTypes.FloatType)
   }
 
-  test("cast DoubleType to DecimalType(10,2)") {
+  ignore("cast DoubleType to DecimalType(10,2)") {
+    // https://github.com/apache/datafusion-comet/issues/1371
     castTest(generateDoubles(), DataTypes.createDecimalType(10, 2))
   }
 
+  test("cast DoubleType to DecimalType(10,2) - allow incompat") {
+    withSQLConf(CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
+      castTest(generateDoubles(), DataTypes.createDecimalType(10, 2))
+    }
+  }
+
   test("cast DoubleType to StringType") {
     // https://github.com/apache/datafusion-comet/issues/312
     val r = new Random(0)
diff --git 
a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala 
b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
index 6795f3d55..3215a984e 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.internal.SQLConf
 
 import org.apache.comet.CometConf
 import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
 
 /**
  * Test suite dedicated to Comet native aggregate operator
@@ -39,6 +40,27 @@ import 
org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
 class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
   import testImplicits._
 
+  test("avg decimal") {
+    withTempDir { dir =>
+      val path = new Path(dir.toURI.toString, "test.parquet")
+      val filename = path.toString
+      val random = new Random(42)
+      withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+        ParquetGenerator.makeParquetFile(random, spark, filename, 10000, 
DataGenOptions())
+      }
+      val tableName = "avg_decimal"
+      withTable(tableName) {
+        val table = spark.read.parquet(filename).coalesce(1)
+        table.createOrReplaceTempView(tableName)
+        // we fall back to Spark for avg on decimal due to the following issue
+        // https://github.com/apache/datafusion-comet/issues/1371
+        // once this is fixed, we should change this test to
+        // checkSparkAnswerAndNumOfAggregates
+        checkSparkAnswer(s"SELECT c1, avg(c7) FROM $tableName GROUP BY c1 
ORDER BY c1")
+      }
+    }
+  }
+
   test("stddev_pop should return NaN for some cases") {
     withSQLConf(
       CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
@@ -867,10 +889,11 @@ class CometAggregateSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
 
     withSQLConf(
       CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
+      CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true",
       CometConf.COMET_SHUFFLE_MODE.key -> "native") {
       Seq(true, false).foreach { dictionaryEnabled =>
         withSQLConf("parquet.enable.dictionary" -> dictionaryEnabled.toString) 
{
-          val table = "t1"
+          val table = s"final_decimal_avg_$dictionaryEnabled"
           withTable(table) {
             sql(s"create table $table(a decimal(38, 37), b INT) using parquet")
             sql(s"insert into $table 
values(-0.0000000000000000000000000000000000002, 1)")
@@ -884,13 +907,13 @@ class CometAggregateSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
             sql(s"insert into $table 
values(0.13344406545919155429936259114971302408, 5)")
             sql(s"insert into $table 
values(0.13344406545919155429936259114971302408, 5)")
 
-            checkSparkAnswerAndNumOfAggregates("SELECT b , AVG(a) FROM t1 
GROUP BY b", 2)
-            checkSparkAnswerAndNumOfAggregates("SELECT AVG(a) FROM t1", 2)
+            checkSparkAnswerAndNumOfAggregates(s"SELECT b , AVG(a) FROM $table 
GROUP BY b", 2)
+            checkSparkAnswerAndNumOfAggregates(s"SELECT AVG(a) FROM $table", 2)
             checkSparkAnswerAndNumOfAggregates(
-              "SELECT b, MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1 
GROUP BY b",
+              s"SELECT b, MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM $table 
GROUP BY b",
               2)
             checkSparkAnswerAndNumOfAggregates(
-              "SELECT MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1",
+              s"SELECT MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM $table",
               2)
           }
         }
@@ -915,7 +938,7 @@ class CometAggregateSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
     withSQLConf(
       CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
       CometConf.COMET_SHUFFLE_MODE.key -> "native") {
-      val table = "t1"
+      val table = "avg_null_handling"
       withTable(table) {
         sql(s"create table $table(a double, b double) using parquet")
         sql(s"insert into $table values(1, 1.0)")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to