advancedxy commented on code in PR #263:
URL: 
https://github.com/apache/arrow-datafusion-comet/pull/263#discussion_r1569832400


##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
+
+import scala.collection.mutable
+
+import org.scalatest.Ignore
+import org.scalatest.exceptions.TestFailedException
+
+import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
+/**
+ * Manual test to calculate Spark builtin expressions coverage support by the 
Comet
+ *
+ * The test will update files doc/spark_builtin_expr_coverage.txt,
+ * doc/spark_builtin_expr_coverage_agg.txt
+ */
+
+@Ignore
+class CometExpressionCoverageSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
+
+  import testImplicits._
+
+  private val rawCoverageFilePath = "doc/spark_builtin_expr_coverage.txt"

Review Comment:
   After a second thought, I think maybe we should add spark version in this 
file name. More  functions will be added in new spark version. It might be 
helpful to indicate how many functions are supported in each Spark version.
   
   This can be done in a follow up though



##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
+
+import scala.collection.mutable
+
+import org.scalatest.Ignore
+import org.scalatest.exceptions.TestFailedException
+
+import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
+/**
+ * Manual test to calculate Spark builtin expressions coverage support by the 
Comet
+ *
+ * The test will update files doc/spark_builtin_expr_coverage.txt,
+ * doc/spark_builtin_expr_coverage_agg.txt
+ */
+
+@Ignore
+class CometExpressionCoverageSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
+
+  import testImplicits._
+
+  private val rawCoverageFilePath = "doc/spark_builtin_expr_coverage.txt"
+  private val aggCoverageFilePath = "doc/spark_builtin_expr_coverage_agg.txt"
+
+  test("Test Spark builtin expressions coverage") {
+    val queryPattern = """(?i)SELECT (.+?);""".r
+    val valuesPattern = """(?i)FROM VALUES(.+?);""".r
+    val selectPattern = """(i?)SELECT(.+?)FROM""".r
+    val builtinExamplesMap = spark.sessionState.functionRegistry
+      .listFunction()
+      .map(spark.sessionState.catalog.lookupFunctionInfo(_))
+      .filter(_.getSource.toLowerCase == "built-in")
+      // exclude spark streaming functions, Comet has no plans to support 
streaming in near future
+      .filter(f =>
+        !List("window", "session_window", 
"window_time").contains(f.getName.toLowerCase))
+      .map(f => {
+        val selectRows = 
queryPattern.findAllMatchIn(f.getExamples).map(_.group(0)).toList
+        (f.getName, selectRows.filter(_.nonEmpty))
+      })
+      .toMap
+
+    // key - function name
+    // value - list of result shows if function supported by Comet
+    val resultsMap = new mutable.HashMap[String, CoverageResult]()
+
+    builtinExamplesMap.foreach {
+      case (funcName, q :: _) =>
+        val queryResult =
+          try {
+            // Example with predefined values
+            // e.g. SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col)
+            // better option is probably to parse the query and iterate 
through expressions
+            // but this is adhoc coverage test
+            if (q.toLowerCase.contains(" from values")) {
+              val select = selectPattern.findFirstMatchIn(q).map(_.group(0))
+              val values = valuesPattern.findFirstMatchIn(q).map(_.group(0))
+              (select, values) match {
+                case (Some(s), Some(v)) =>
+                  testSingleLineQuery(s"select * $v", s"$s tbl")
+
+                case _ =>
+                  resultsMap.put(
+                    funcName,
+                    CoverageResult("FAILED", Seq((q, "Cannot parse 
properly"))))
+              }
+            } else {
+              // Plain example like SELECT cos(0);
+              testSingleLineQuery(
+                "select 'dummy' x",
+                s"${q.dropRight(1)}, x from tbl",
+                excludedOptimizerRules =
+                  
Some("org.apache.spark.sql.catalyst.optimizer.ConstantFolding"))
+            }
+            CoverageResult(CoverageResultStatus.Passed.toString, Seq((q, 
"OK")))
+          } catch {
+            case e: TestFailedException
+                if e.message.getOrElse("").contains("Expected only Comet 
native operators") =>
+              CoverageResult(CoverageResultStatus.Failed.toString, Seq((q, 
"Unsupported")))
+            case e if e.getMessage.contains("CometNativeException") =>
+              CoverageResult(
+                CoverageResultStatus.Failed.toString,
+                Seq((q, "Failed on native side")))
+            case _ =>
+              CoverageResult(
+                CoverageResultStatus.Failed.toString,
+                Seq((q, "Failed on something else. Check query manually")))
+          }
+        resultsMap.put(funcName, queryResult)
+
+      case (funcName, List()) =>
+        resultsMap.put(
+          funcName,
+          CoverageResult(
+            CoverageResultStatus.Skipped.toString,
+            Seq(("", "No examples found in 
spark.sessionState.functionRegistry"))))
+    }
+
+    // TODO: convert results into HTML
+    resultsMap.toSeq.toDF("name", "details").createOrReplaceTempView("t")
+    val str_agg = showString(
+      spark.sql(
+        "select result, d._2 as details, count(1) cnt from (select name, 
t.details.result, explode_outer(t.details.details) as d from t) group by 1, 2 
order by 1"),
+      1000,
+      0)
+    Files.write(Paths.get(aggCoverageFilePath), 
str_agg.getBytes(StandardCharsets.UTF_8))
+
+    val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
+    Files.write(Paths.get(rawCoverageFilePath), 
str.getBytes(StandardCharsets.UTF_8))

Review Comment:
   Ah, I see you filed a new issue. It could be addressed in a followup PR then.



##########
spark/src/test/scala/org/apache/comet/CometExpressionCoverageSuite.scala:
##########
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.comet
+
+import java.nio.charset.StandardCharsets
+import java.nio.file.{Files, Paths}
+
+import scala.collection.mutable
+
+import org.scalatest.Ignore
+import org.scalatest.exceptions.TestFailedException
+
+import org.apache.spark.sql.CometTestBase
+import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper
+
+/**
+ * Manual test to calculate Spark builtin expressions coverage support by the 
Comet
+ *
+ * The test will update files doc/spark_builtin_expr_coverage.txt,
+ * doc/spark_builtin_expr_coverage_agg.txt
+ */
+
+@Ignore
+class CometExpressionCoverageSuite extends CometTestBase with 
AdaptiveSparkPlanHelper {
+
+  import testImplicits._
+
+  private val rawCoverageFilePath = "doc/spark_builtin_expr_coverage.txt"
+  private val aggCoverageFilePath = "doc/spark_builtin_expr_coverage_agg.txt"
+
+  test("Test Spark builtin expressions coverage") {
+    val queryPattern = """(?i)SELECT (.+?);""".r
+    val valuesPattern = """(?i)FROM VALUES(.+?);""".r
+    val selectPattern = """(i?)SELECT(.+?)FROM""".r
+    val builtinExamplesMap = spark.sessionState.functionRegistry
+      .listFunction()
+      .map(spark.sessionState.catalog.lookupFunctionInfo(_))
+      .filter(_.getSource.toLowerCase == "built-in")
+      // exclude spark streaming functions, Comet has no plans to support 
streaming in near future
+      .filter(f =>
+        !List("window", "session_window", 
"window_time").contains(f.getName.toLowerCase))
+      .map(f => {
+        val selectRows = 
queryPattern.findAllMatchIn(f.getExamples).map(_.group(0)).toList
+        (f.getName, selectRows.filter(_.nonEmpty))
+      })
+      .toMap
+
+    // key - function name
+    // value - list of result shows if function supported by Comet
+    val resultsMap = new mutable.HashMap[String, CoverageResult]()
+
+    builtinExamplesMap.foreach {
+      case (funcName, q :: _) =>
+        val queryResult =
+          try {
+            // Example with predefined values
+            // e.g. SELECT bit_xor(col) FROM VALUES (3), (5) AS tab(col)
+            // better option is probably to parse the query and iterate 
through expressions
+            // but this is adhoc coverage test
+            if (q.toLowerCase.contains(" from values")) {
+              val select = selectPattern.findFirstMatchIn(q).map(_.group(0))
+              val values = valuesPattern.findFirstMatchIn(q).map(_.group(0))
+              (select, values) match {
+                case (Some(s), Some(v)) =>
+                  testSingleLineQuery(s"select * $v", s"$s tbl")
+
+                case _ =>
+                  resultsMap.put(
+                    funcName,
+                    CoverageResult("FAILED", Seq((q, "Cannot parse 
properly"))))
+              }
+            } else {
+              // Plain example like SELECT cos(0);
+              testSingleLineQuery(
+                "select 'dummy' x",
+                s"${q.dropRight(1)}, x from tbl",
+                excludedOptimizerRules =
+                  
Some("org.apache.spark.sql.catalyst.optimizer.ConstantFolding"))
+            }
+            CoverageResult(CoverageResultStatus.Passed.toString, Seq((q, 
"OK")))
+          } catch {
+            case e: TestFailedException
+                if e.message.getOrElse("").contains("Expected only Comet 
native operators") =>
+              CoverageResult(CoverageResultStatus.Failed.toString, Seq((q, 
"Unsupported")))
+            case e if e.getMessage.contains("CometNativeException") =>
+              CoverageResult(
+                CoverageResultStatus.Failed.toString,
+                Seq((q, "Failed on native side")))
+            case _ =>
+              CoverageResult(
+                CoverageResultStatus.Failed.toString,
+                Seq((q, "Failed on something else. Check query manually")))
+          }
+        resultsMap.put(funcName, queryResult)
+
+      case (funcName, List()) =>
+        resultsMap.put(
+          funcName,
+          CoverageResult(
+            CoverageResultStatus.Skipped.toString,
+            Seq(("", "No examples found in 
spark.sessionState.functionRegistry"))))
+    }
+
+    // TODO: convert results into HTML
+    resultsMap.toSeq.toDF("name", "details").createOrReplaceTempView("t")
+    val str_agg = showString(
+      spark.sql(
+        "select result, d._2 as details, count(1) cnt from (select name, 
t.details.result, explode_outer(t.details.details) as d from t) group by 1, 2 
order by 1"),
+      1000,
+      0)
+    Files.write(Paths.get(aggCoverageFilePath), 
str_agg.getBytes(StandardCharsets.UTF_8))
+
+    val str = showString(spark.sql("select * from t order by 1"), 1000, 0)
+    Files.write(Paths.get(rawCoverageFilePath), 
str.getBytes(StandardCharsets.UTF_8))

Review Comment:
   I second  @viirya's point, 
https://github.com/apache/arrow-datafusion-comet/pull/263#discussion_r1566815379.
 It would be better to put the aggregated result in the same file. 
   
   
   



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to