This is an automated email from the ASF dual-hosted git repository.

agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git


The following commit(s) were added to refs/heads/main by this push:
     new dca45ea92 chore: Add microbenchmark for casting string to temporal 
types (#2980)
dca45ea92 is described below

commit dca45ea928f016cf7d496335b2228f3747f8480d
Author: Andy Grove <[email protected]>
AuthorDate: Fri Dec 26 07:42:51 2025 -0700

    chore: Add microbenchmark for casting string to temporal types (#2980)
---
 .../CometCastStringToTemporalBenchmark.scala       | 101 +++++++++++++++++++++
 1 file changed, 101 insertions(+)

diff --git 
a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToTemporalBenchmark.scala
 
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToTemporalBenchmark.scala
new file mode 100644
index 000000000..39337be5c
--- /dev/null
+++ 
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToTemporalBenchmark.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+case class CastStringToTemporalConfig(
+    name: String,
+    query: String,
+    extraCometConfigs: Map[String, String] = Map.empty)
+
+// spotless:off
+/**
+ * Benchmark to measure performance of Comet cast from String to temporal 
types. To run this
+ * benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make 
benchmark-org.apache.spark.sql.benchmark.CometCastStringToTemporalBenchmark`
+ * Results will be written to 
"spark/benchmarks/CometCastStringToTemporalBenchmark-**results.txt".
+ */
+// spotless:on
+object CometCastStringToTemporalBenchmark extends CometBenchmarkBase {
+
+  // Configuration for String to temporal cast benchmarks
+  private val dateCastConfigs = List(
+    CastStringToTemporalConfig(
+      "Cast String to Date",
+      "SELECT CAST(c1 AS DATE) FROM parquetV1Table"),
+    CastStringToTemporalConfig(
+      "Try_Cast String to Date",
+      "SELECT TRY_CAST(c1 AS DATE) FROM parquetV1Table"))
+
+  private val timestampCastConfigs = List(
+    CastStringToTemporalConfig(
+      "Cast String to Timestamp",
+      "SELECT CAST(c1 AS TIMESTAMP) FROM parquetV1Table"),
+    CastStringToTemporalConfig(
+      "Try_Cast String to Timestamp",
+      "SELECT TRY_CAST(c1 AS TIMESTAMP) FROM parquetV1Table"))
+
+  override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+    val values = 1024 * 1024 * 10 // 10M rows
+
+    // Generate date data once with ~10% invalid values
+    runBenchmarkWithTable("date data generation", values) { v =>
+      withTempPath { dateDir =>
+        withTempTable("parquetV1Table") {
+          prepareTable(
+            dateDir,
+            spark.sql(s"""
+              SELECT CASE
+                WHEN value % 10 = 0 THEN 'invalid-date'
+                ELSE CAST(DATE_ADD('2020-01-01', CAST(value % 3650 AS INT)) AS 
STRING)
+              END AS c1
+              FROM $tbl
+            """))
+
+          // Run date cast benchmarks with the same data
+          dateCastConfigs.foreach { config =>
+            runExpressionBenchmark(config.name, v, config.query, 
config.extraCometConfigs)
+          }
+        }
+      }
+    }
+
+    // Generate timestamp data once with ~10% invalid values
+    runBenchmarkWithTable("timestamp data generation", values) { v =>
+      withTempPath { timestampDir =>
+        withTempTable("parquetV1Table") {
+          prepareTable(
+            timestampDir,
+            spark.sql(s"""
+              SELECT CASE
+                WHEN value % 10 = 0 THEN 'not-a-timestamp'
+                ELSE CAST(TIMESTAMP_MICROS(value % 9999999999) AS STRING)
+              END AS c1
+              FROM $tbl
+            """))
+
+          // Run timestamp cast benchmarks with the same data
+          timestampCastConfigs.foreach { config =>
+            runExpressionBenchmark(config.name, v, config.query, 
config.extraCometConfigs)
+          }
+        }
+      }
+    }
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to