This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new 937619f48 chore: Add microbenchmark for casting string to numeric 
(#2979)
937619f48 is described below

commit 937619f4893be7a86071434315bae9d56d716bbe
Author: Andy Grove <[email protected]>
AuthorDate: Fri Jan 2 09:39:13 2026 -0700

    chore: Add microbenchmark for casting string to numeric (#2979)
---
 .../CometCastStringToNumericBenchmark.scala        | 95 ++++++++++++++++++++++
 1 file changed, 95 insertions(+)

diff --git 
a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToNumericBenchmark.scala
 
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToNumericBenchmark.scala
new file mode 100644
index 000000000..7f210fc73
--- /dev/null
+++ 
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToNumericBenchmark.scala
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+import org.apache.spark.sql.catalyst.expressions.Cast
+import org.apache.spark.sql.internal.SQLConf
+
+import org.apache.comet.CometConf
+
+case class CastStringToNumericConfig(
+    name: String,
+    query: String,
+    extraCometConfigs: Map[String, String] = Map.empty)
+
+/**
+ * Benchmark to measure performance of Comet cast from String to numeric 
types. To run this
+ * benchmark:
+ * {{{
+ *   SPARK_GENERATE_BENCHMARK_FILES=1 make 
benchmark-org.apache.spark.sql.benchmark.CometCastStringToNumericBenchmark
+ * }}}
+ */
+object CometCastStringToNumericBenchmark extends CometBenchmarkBase {
+
+  private val castFunctions = Seq("CAST", "TRY_CAST")
+  private val targetTypes =
+    Seq(
+      "BOOLEAN",
+      "BYTE",
+      "SHORT",
+      "INT",
+      "LONG",
+      "FLOAT",
+      "DOUBLE",
+      "DECIMAL(10,2)",
+      "DECIMAL(38,19)")
+
+  private val castConfigs = for {
+    castFunc <- castFunctions
+    targetType <- targetTypes
+  } yield CastStringToNumericConfig(
+    s"$castFunc String to $targetType",
+    s"SELECT $castFunc(c1 AS $targetType) FROM parquetV1Table",
+    Map(
+      SQLConf.ANSI_ENABLED.key -> "false",
+      CometConf.getExprAllowIncompatConfigKey(classOf[Cast]) -> "true"))
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val values = 1024 * 1024 // 1M rows
+
+    // Generate input data once for all benchmarks
+    runBenchmarkWithTable("String to numeric casts", values) { v =>
+      withTempPath { dir =>
+        withTempTable("parquetV1Table") {
+          // Generate numeric strings with both integer and decimal values
+          // Also include some special values: nulls (~2%), NaN (~2%), 
Infinity (~2%)
+          prepareTable(
+            dir,
+            spark.sql(s"""
+              SELECT CASE
+                WHEN value % 50 = 0 THEN NULL
+                WHEN value % 50 = 1 THEN 'NaN'
+                WHEN value % 50 = 2 THEN 'Infinity'
+                WHEN value % 50 = 3 THEN '-Infinity'
+                WHEN value % 50 < 10 THEN CAST(value % 99 AS STRING)
+                WHEN value % 50 < 30 THEN CAST(value % 999999 AS STRING)
+                ELSE CAST((value - 500000) / 100.0 AS STRING)
+              END AS c1
+              FROM $tbl
+            """))
+
+          castConfigs.foreach { config =>
+            runExpressionBenchmark(config.name, v, config.query, 
config.extraCometConfigs)
+          }
+        }
+      }
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to