This is an automated email from the ASF dual-hosted git repository.
comphead pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 383b56d88 chore: Add tolerance for `ComparisonTool` (#2699)
383b56d88 is described below
commit 383b56d8801a2b63b0c2a0c01cca62655a9096de
Author: Oleks V <[email protected]>
AuthorDate: Thu Nov 6 13:14:34 2025 -0800
chore: Add tolerance for `ComparisonTool` (#2699)
* chore: Add tolerance for `ComparisonTool` to identify error threshold for
floating point comparisons
---
.../scala/org/apache/comet/fuzz/ComparisonTool.scala | 10 +++++++---
.../main/scala/org/apache/comet/fuzz/QueryRunner.scala | 17 +++++++++--------
2 files changed, 16 insertions(+), 11 deletions(-)
diff --git
a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala
b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala
index 03b6f7334..055ea6553 100644
--- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala
+++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/ComparisonTool.scala
@@ -31,6 +31,8 @@ class ComparisonToolConf(arguments: Seq[String]) extends
ScallopConf(arguments)
opt[String](required = true, descr = "Folder with Spark produced results
in Parquet format")
val inputCometFolder: ScallopOption[String] =
opt[String](required = true, descr = "Folder with Comet produced results
in Parquet format")
+ val tolerance: ScallopOption[Double] =
+ opt[Double](default = Some(0.000002), descr = "Tolerance for floating
point comparisons")
}
addSubcommand(compareParquet)
verify()
@@ -49,7 +51,8 @@ object ComparisonTool {
compareParquetFolders(
spark,
conf.compareParquet.inputSparkFolder(),
- conf.compareParquet.inputCometFolder())
+ conf.compareParquet.inputCometFolder(),
+ conf.compareParquet.tolerance())
case _ =>
// scalastyle:off println
@@ -62,7 +65,8 @@ object ComparisonTool {
private def compareParquetFolders(
spark: SparkSession,
sparkFolderPath: String,
- cometFolderPath: String): Unit = {
+ cometFolderPath: String,
+ tolerance: Double): Unit = {
val output = QueryRunner.createOutputMdFile()
@@ -115,7 +119,7 @@ object ComparisonTool {
val cometRows =
cometDf.orderBy(cometDf.columns.map(functions.col): _*).collect()
// Compare the results
- if (QueryComparison.assertSameRows(sparkRows, cometRows, output)) {
+ if (QueryComparison.assertSameRows(sparkRows, cometRows, output,
tolerance)) {
output.write(s"Subfolder $subfolderName: ${sparkRows.length}
rows matched\n\n")
} else {
// Output schema if dataframes are not equal
diff --git
a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala
b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala
index 23bfbc1c0..dc7189c53 100644
--- a/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala
+++ b/fuzz-testing/src/main/scala/org/apache/comet/fuzz/QueryRunner.scala
@@ -148,7 +148,8 @@ object QueryComparison {
def assertSameRows(
sparkRows: Array[Row],
cometRows: Array[Row],
- output: BufferedWriter): Boolean = {
+ output: BufferedWriter,
+ tolerance: Double = 0.000001): Boolean = {
if (sparkRows.length == cometRows.length) {
var i = 0
while (i < sparkRows.length) {
@@ -164,7 +165,7 @@ object QueryComparison {
assert(l.length == r.length)
for (j <- 0 until l.length) {
- if (!same(l(j), r(j))) {
+ if (!same(l(j), r(j), tolerance)) {
output.write(s"First difference at row $i:\n")
output.write("Spark: `" + formatRow(l) + "`\n")
output.write("Comet: `" + formatRow(r) + "`\n")
@@ -186,7 +187,7 @@ object QueryComparison {
true
}
- private def same(l: Any, r: Any): Boolean = {
+ private def same(l: Any, r: Any, tolerance: Double): Boolean = {
if (l == null || r == null) {
return l == null && r == null
}
@@ -195,20 +196,20 @@ object QueryComparison {
case (a: Float, b: Float) if a.isNegInfinity => b.isNegInfinity
case (a: Float, b: Float) if a.isInfinity => b.isInfinity
case (a: Float, b: Float) if a.isNaN => b.isNaN
- case (a: Float, b: Float) => (a - b).abs <= 0.000001f
+ case (a: Float, b: Float) => (a - b).abs <= tolerance
case (a: Double, b: Double) if a.isPosInfinity => b.isPosInfinity
case (a: Double, b: Double) if a.isNegInfinity => b.isNegInfinity
case (a: Double, b: Double) if a.isInfinity => b.isInfinity
case (a: Double, b: Double) if a.isNaN => b.isNaN
- case (a: Double, b: Double) => (a - b).abs <= 0.000001
+ case (a: Double, b: Double) => (a - b).abs <= tolerance
case (a: Array[_], b: Array[_]) =>
- a.length == b.length && a.zip(b).forall(x => same(x._1, x._2))
+ a.length == b.length && a.zip(b).forall(x => same(x._1, x._2,
tolerance))
case (a: mutable.WrappedArray[_], b: mutable.WrappedArray[_]) =>
- a.length == b.length && a.zip(b).forall(x => same(x._1, x._2))
+ a.length == b.length && a.zip(b).forall(x => same(x._1, x._2,
tolerance))
case (a: Row, b: Row) =>
val aa = a.toSeq
val bb = b.toSeq
- aa.length == bb.length && aa.zip(bb).forall(x => same(x._1, x._2))
+ aa.length == bb.length && aa.zip(bb).forall(x => same(x._1, x._2,
tolerance))
case (a, b) => a == b
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]