This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 26b8d57a8 fix: Mark cast from float/double to decimal as incompatible
(#1372)
26b8d57a8 is described below
commit 26b8d57a87efe7b0e1cbc9a2f9eb1036faa13fa2
Author: Andy Grove <[email protected]>
AuthorDate: Fri Feb 7 12:07:17 2025 -0800
fix: Mark cast from float/double to decimal as incompatible (#1372)
* add failing test
* Mark cast from float/double to decimal as incompat
* update docs
* update cast tests
* link to issue
* fix regressions
* use unique table name in test
* use withTable
* address feedback
---
docs/source/user-guide/compatibility.md | 4 +--
.../org/apache/comet/expressions/CometCast.scala | 8 +++--
.../apache/comet/testing/ParquetGenerator.scala | 10 +++----
.../scala/org/apache/comet/CometCastSuite.scala | 18 +++++++++--
.../apache/comet/exec/CometAggregateSuite.scala | 35 ++++++++++++++++++----
5 files changed, 58 insertions(+), 17 deletions(-)
diff --git a/docs/source/user-guide/compatibility.md
b/docs/source/user-guide/compatibility.md
index 893326e19..d603c0b99 100644
--- a/docs/source/user-guide/compatibility.md
+++ b/docs/source/user-guide/compatibility.md
@@ -117,7 +117,6 @@ The following cast operations are generally compatible with
Spark except for the
| float | integer | |
| float | long | |
| float | double | |
-| float | decimal | |
| float | string | There can be differences in precision. For example, the
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
| double | boolean | |
| double | byte | |
@@ -125,7 +124,6 @@ The following cast operations are generally compatible with
Spark except for the
| double | integer | |
| double | long | |
| double | float | |
-| double | decimal | |
| double | string | There can be differences in precision. For example, the
input "1.4E-45" will produce 1.0E-45 instead of 1.4E-45 |
| decimal | byte | |
| decimal | short | |
@@ -154,6 +152,8 @@ The following cast operations are not compatible with Spark
for all inputs and a
|-|-|-|
| integer | decimal | No overflow check |
| long | decimal | No overflow check |
+| float | decimal | There can be rounding differences |
+| double | decimal | There can be rounding differences |
| string | float | Does not support inputs ending with 'd' or 'f'. Does not
support 'inf'. Does not support ANSI mode. |
| string | double | Does not support inputs ending with 'd' or 'f'. Does not
support 'inf'. Does not support ANSI mode. |
| string | decimal | Does not support inputs ending with 'd' or 'f'. Does not
support 'inf'. Does not support ANSI mode. Returns 0.0 instead of null if input
contains no digits |
diff --git a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
index 6b0b10d80..9aa7f7c8b 100644
--- a/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
+++ b/spark/src/main/scala/org/apache/comet/expressions/CometCast.scala
@@ -267,7 +267,9 @@ object CometCast {
case DataTypes.BooleanType | DataTypes.DoubleType | DataTypes.ByteType |
DataTypes.ShortType |
DataTypes.IntegerType | DataTypes.LongType =>
Compatible()
- case _: DecimalType => Compatible()
+ case _: DecimalType =>
+ // https://github.com/apache/datafusion-comet/issues/1371
+ Incompatible(Some("There can be rounding differences"))
case _ => Unsupported
}
@@ -275,7 +277,9 @@ object CometCast {
case DataTypes.BooleanType | DataTypes.FloatType | DataTypes.ByteType |
DataTypes.ShortType |
DataTypes.IntegerType | DataTypes.LongType =>
Compatible()
- case _: DecimalType => Compatible()
+ case _: DecimalType =>
+ // https://github.com/apache/datafusion-comet/issues/1371
+ Incompatible(Some("There can be rounding differences"))
case _ => Unsupported
}
diff --git
a/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
b/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
index f209cc4c9..acb565ec0 100644
--- a/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
+++ b/spark/src/main/scala/org/apache/comet/testing/ParquetGenerator.scala
@@ -212,8 +212,8 @@ object ParquetGenerator {
}
case class DataGenOptions(
- allowNull: Boolean,
- generateNegativeZero: Boolean,
- generateArray: Boolean,
- generateStruct: Boolean,
- generateMap: Boolean)
+ allowNull: Boolean = true,
+ generateNegativeZero: Boolean = true,
+ generateArray: Boolean = false,
+ generateStruct: Boolean = false,
+ generateMap: Boolean = false)
diff --git a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
index 27d8e2357..e2b2ed55a 100644
--- a/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometCastSuite.scala
@@ -348,10 +348,17 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
castTest(generateFloats(), DataTypes.DoubleType)
}
- test("cast FloatType to DecimalType(10,2)") {
+ ignore("cast FloatType to DecimalType(10,2)") {
+ // // https://github.com/apache/datafusion-comet/issues/1371
castTest(generateFloats(), DataTypes.createDecimalType(10, 2))
}
+ test("cast FloatType to DecimalType(10,2) - allow incompat") {
+ withSQLConf(CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
+ castTest(generateFloats(), DataTypes.createDecimalType(10, 2))
+ }
+ }
+
test("cast FloatType to StringType") {
// https://github.com/apache/datafusion-comet/issues/312
val r = new Random(0)
@@ -401,10 +408,17 @@ class CometCastSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
castTest(generateDoubles(), DataTypes.FloatType)
}
- test("cast DoubleType to DecimalType(10,2)") {
+ ignore("cast DoubleType to DecimalType(10,2)") {
+ // https://github.com/apache/datafusion-comet/issues/1371
castTest(generateDoubles(), DataTypes.createDecimalType(10, 2))
}
+ test("cast DoubleType to DecimalType(10,2) - allow incompat") {
+ withSQLConf(CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true") {
+ castTest(generateDoubles(), DataTypes.createDecimalType(10, 2))
+ }
+ }
+
test("cast DoubleType to StringType") {
// https://github.com/apache/datafusion-comet/issues/312
val r = new Random(0)
diff --git
a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
index 6795f3d55..3215a984e 100644
--- a/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/exec/CometAggregateSuite.scala
@@ -32,6 +32,7 @@ import org.apache.spark.sql.internal.SQLConf
import org.apache.comet.CometConf
import org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
+import org.apache.comet.testing.{DataGenOptions, ParquetGenerator}
/**
* Test suite dedicated to Comet native aggregate operator
@@ -39,6 +40,27 @@ import
org.apache.comet.CometSparkSessionExtensions.isSpark34Plus
class CometAggregateSuite extends CometTestBase with AdaptiveSparkPlanHelper {
import testImplicits._
+ test("avg decimal") {
+ withTempDir { dir =>
+ val path = new Path(dir.toURI.toString, "test.parquet")
+ val filename = path.toString
+ val random = new Random(42)
+ withSQLConf(CometConf.COMET_ENABLED.key -> "false") {
+ ParquetGenerator.makeParquetFile(random, spark, filename, 10000,
DataGenOptions())
+ }
+ val tableName = "avg_decimal"
+ withTable(tableName) {
+ val table = spark.read.parquet(filename).coalesce(1)
+ table.createOrReplaceTempView(tableName)
+ // we fall back to Spark for avg on decimal due to the following issue
+ // https://github.com/apache/datafusion-comet/issues/1371
+ // once this is fixed, we should change this test to
+ // checkSparkAnswerAndNumOfAggregates
+ checkSparkAnswer(s"SELECT c1, avg(c7) FROM $tableName GROUP BY c1
ORDER BY c1")
+ }
+ }
+ }
+
test("stddev_pop should return NaN for some cases") {
withSQLConf(
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
@@ -867,10 +889,11 @@ class CometAggregateSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
withSQLConf(
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
+ CometConf.COMET_CAST_ALLOW_INCOMPATIBLE.key -> "true",
CometConf.COMET_SHUFFLE_MODE.key -> "native") {
Seq(true, false).foreach { dictionaryEnabled =>
withSQLConf("parquet.enable.dictionary" -> dictionaryEnabled.toString)
{
- val table = "t1"
+ val table = s"final_decimal_avg_$dictionaryEnabled"
withTable(table) {
sql(s"create table $table(a decimal(38, 37), b INT) using parquet")
sql(s"insert into $table
values(-0.0000000000000000000000000000000000002, 1)")
@@ -884,13 +907,13 @@ class CometAggregateSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
sql(s"insert into $table
values(0.13344406545919155429936259114971302408, 5)")
sql(s"insert into $table
values(0.13344406545919155429936259114971302408, 5)")
- checkSparkAnswerAndNumOfAggregates("SELECT b , AVG(a) FROM t1
GROUP BY b", 2)
- checkSparkAnswerAndNumOfAggregates("SELECT AVG(a) FROM t1", 2)
+ checkSparkAnswerAndNumOfAggregates(s"SELECT b , AVG(a) FROM $table
GROUP BY b", 2)
+ checkSparkAnswerAndNumOfAggregates(s"SELECT AVG(a) FROM $table", 2)
checkSparkAnswerAndNumOfAggregates(
- "SELECT b, MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1
GROUP BY b",
+ s"SELECT b, MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM $table
GROUP BY b",
2)
checkSparkAnswerAndNumOfAggregates(
- "SELECT MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM t1",
+ s"SELECT MIN(a), MAX(a), COUNT(a), SUM(a), AVG(a) FROM $table",
2)
}
}
@@ -915,7 +938,7 @@ class CometAggregateSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
withSQLConf(
CometConf.COMET_EXEC_SHUFFLE_ENABLED.key -> "true",
CometConf.COMET_SHUFFLE_MODE.key -> "native") {
- val table = "t1"
+ val table = "avg_null_handling"
withTable(table) {
sql(s"create table $table(a double, b double) using parquet")
sql(s"insert into $table values(1, 1.0)")
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]