This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new dca45ea92 chore: Add microbenchmark for casting string to temporal
types (#2980)
dca45ea92 is described below
commit dca45ea928f016cf7d496335b2228f3747f8480d
Author: Andy Grove <[email protected]>
AuthorDate: Fri Dec 26 07:42:51 2025 -0700
chore: Add microbenchmark for casting string to temporal types (#2980)
---
.../CometCastStringToTemporalBenchmark.scala | 101 +++++++++++++++++++++
1 file changed, 101 insertions(+)
diff --git
a/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToTemporalBenchmark.scala
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToTemporalBenchmark.scala
new file mode 100644
index 000000000..39337be5c
--- /dev/null
+++
b/spark/src/test/scala/org/apache/spark/sql/benchmark/CometCastStringToTemporalBenchmark.scala
@@ -0,0 +1,101 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.spark.sql.benchmark
+
+case class CastStringToTemporalConfig(
+ name: String,
+ query: String,
+ extraCometConfigs: Map[String, String] = Map.empty)
+
+// spotless:off
+/**
+ * Benchmark to measure performance of Comet cast from String to temporal
types. To run this
+ * benchmark:
+ * `SPARK_GENERATE_BENCHMARK_FILES=1 make
benchmark-org.apache.spark.sql.benchmark.CometCastStringToTemporalBenchmark`
+ * Results will be written to
"spark/benchmarks/CometCastStringToTemporalBenchmark-**results.txt".
+ */
+// spotless:on
+object CometCastStringToTemporalBenchmark extends CometBenchmarkBase {
+
+ // Configuration for String to temporal cast benchmarks
+ private val dateCastConfigs = List(
+ CastStringToTemporalConfig(
+ "Cast String to Date",
+ "SELECT CAST(c1 AS DATE) FROM parquetV1Table"),
+ CastStringToTemporalConfig(
+ "Try_Cast String to Date",
+ "SELECT TRY_CAST(c1 AS DATE) FROM parquetV1Table"))
+
+ private val timestampCastConfigs = List(
+ CastStringToTemporalConfig(
+ "Cast String to Timestamp",
+ "SELECT CAST(c1 AS TIMESTAMP) FROM parquetV1Table"),
+ CastStringToTemporalConfig(
+ "Try_Cast String to Timestamp",
+ "SELECT TRY_CAST(c1 AS TIMESTAMP) FROM parquetV1Table"))
+
+ override def runCometBenchmark(mainArgs: Array[String]): Unit = {
+ val values = 1024 * 1024 * 10 // 10M rows
+
+ // Generate date data once with ~10% invalid values
+ runBenchmarkWithTable("date data generation", values) { v =>
+ withTempPath { dateDir =>
+ withTempTable("parquetV1Table") {
+ prepareTable(
+ dateDir,
+ spark.sql(s"""
+ SELECT CASE
+ WHEN value % 10 = 0 THEN 'invalid-date'
+ ELSE CAST(DATE_ADD('2020-01-01', CAST(value % 3650 AS INT)) AS
STRING)
+ END AS c1
+ FROM $tbl
+ """))
+
+ // Run date cast benchmarks with the same data
+ dateCastConfigs.foreach { config =>
+ runExpressionBenchmark(config.name, v, config.query,
config.extraCometConfigs)
+ }
+ }
+ }
+ }
+
+ // Generate timestamp data once with ~10% invalid values
+ runBenchmarkWithTable("timestamp data generation", values) { v =>
+ withTempPath { timestampDir =>
+ withTempTable("parquetV1Table") {
+ prepareTable(
+ timestampDir,
+ spark.sql(s"""
+ SELECT CASE
+ WHEN value % 10 = 0 THEN 'not-a-timestamp'
+ ELSE CAST(TIMESTAMP_MICROS(value % 9999999999) AS STRING)
+ END AS c1
+ FROM $tbl
+ """))
+
+ // Run timestamp cast benchmarks with the same data
+ timestampCastConfigs.foreach { config =>
+ runExpressionBenchmark(config.name, v, config.query,
config.extraCometConfigs)
+ }
+ }
+ }
+ }
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]