This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 84dca9fad feat: Support more date part expressions (#2316)
84dca9fad is described below
commit 84dca9fad1163c3a81f47b42b5a490db119953b6
Author: Zhen Wang <[email protected]>
AuthorDate: Wed Sep 17 00:27:31 2025 +0800
feat: Support more date part expressions (#2316)
---
docs/source/user-guide/latest/expressions.md | 32 +++--
.../org/apache/comet/serde/QueryPlanSerde.scala | 11 +-
.../scala/org/apache/comet/serde/datetime.scala | 132 ++++++++++++++++++++-
.../org/apache/comet/CometExpressionSuite.scala | 9 +-
4 files changed, 162 insertions(+), 22 deletions(-)
diff --git a/docs/source/user-guide/latest/expressions.md
b/docs/source/user-guide/latest/expressions.md
index 5a459866e..c29d0202b 100644
--- a/docs/source/user-guide/latest/expressions.md
+++ b/docs/source/user-guide/latest/expressions.md
@@ -91,19 +91,25 @@ incompatible expressions.
## Date/Time Functions
-| Expression | SQL | Spark-Compatible? |
Compatibility Notes |
-| -------------- | ---------------------------- | ----------------- |
----------------------------------------------------------------------------- |
-| DateAdd | `date_add` | Yes |
|
-| DateSub | `date_sub` | Yes |
|
-| DatePart | `date_part(field, source)` | Yes | Only
`year` is supported |
-| Extract | `extract(field FROM source)` | Yes | Only
`year` is supported |
-| FromUnixTime | `from_unixtime` | No | Does not
support format, supports only -8334601211038 <= sec <= 8210266876799 |
-| Hour | `hour` | Yes |
|
-| Minute | `minute` | Yes |
|
-| Second | `second` | Yes |
|
-| TruncDate | `trunc` | Yes |
|
-| TruncTimestamp | `trunc_date` | Yes |
|
-| Year | `year` | Yes |
|
+| Expression | SQL | Spark-Compatible? |
Compatibility Notes
|
+| -------------- |------------------------------| -----------------
|------------------------------------------------------------------------------------------------------|
+| DateAdd | `date_add` | Yes |
|
+| DateSub | `date_sub` | Yes |
|
+| DatePart | `date_part(field, source)` | Yes |
Supported values of `field`:
`year`/`month`/`week`/`day`/`dayofweek`/`doy`/`quarter`/`hour`/`minute` |
+| Extract | `extract(field FROM source)` | Yes |
Supported values of `field`:
`year`/`month`/`week`/`day`/`dayofweek`/`doy`/`quarter`/`hour`/`minute` |
+| FromUnixTime | `from_unixtime` | No | Does not
support format, supports only -8334601211038 <= sec <= 8210266876799
|
+| Hour | `hour` | Yes |
|
+| Minute | `minute` | Yes |
|
+| Second | `second` | Yes |
|
+| TruncDate | `trunc` | Yes |
|
+| TruncTimestamp | `trunc_date` | Yes |
|
+| Year | `year` | Yes |
|
+| Month | `month` | Yes |
|
+| DayOfMonth | `day`/`dayofmonth` | Yes |
|
+| DayOfWeek | `dayofweek` | Yes |
|
+| DayOfYear | `dayofyear` | Yes |
|
+| WeekOfYear | `weekofyear` | Yes |
|
+| Quarter | `quarter` | Yes |
|
## Math Expressions
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 0f3b1bf1f..6fd68569f 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -203,7 +203,16 @@ object QueryPlanSerde extends Logging with CometExprShim {
classOf[Second] -> CometSecond,
classOf[TruncDate] -> CometTruncDate,
classOf[TruncTimestamp] -> CometTruncTimestamp,
- classOf[Year] -> CometYear)
+ classOf[Year] -> CometYear,
+ classOf[Month] -> CometMonth,
+ classOf[DayOfMonth] -> CometDayOfMonth,
+ classOf[DayOfWeek] -> CometDayOfWeek,
+ // FIXME: current datafusion version does not support isodow (WeekDay)
+ // , see: https://github.com/apache/datafusion-comet/issues/2330
+ // classOf[WeekDay] -> CometWeekDay,
+ classOf[DayOfYear] -> CometDayOfYear,
+ classOf[WeekOfYear] -> CometWeekOfYear,
+ classOf[Quarter] -> CometQuarter)
private val conversionExpressions: Map[Class[_ <: Expression],
CometExpressionSerde[_]] = Map(
classOf[Cast] -> CometCast)
diff --git a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
index 327254e72..8e4c92d70 100644
--- a/spark/src/main/scala/org/apache/comet/serde/datetime.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/datetime.scala
@@ -19,19 +19,43 @@
package org.apache.comet.serde
-import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub,
Hour, Literal, Minute, Second, TruncDate, TruncTimestamp, Year}
+import org.apache.spark.sql.catalyst.expressions.{Attribute, DateAdd, DateSub,
DayOfMonth, DayOfWeek, DayOfYear, GetDateField, Hour, Literal, Minute, Month,
Quarter, Second, TruncDate, TruncTimestamp, WeekDay, WeekOfYear, Year}
import org.apache.spark.sql.types.{DateType, IntegerType}
import org.apache.comet.CometSparkSessionExtensions.withInfo
+import org.apache.comet.serde.CometGetDateField.CometGetDateField
import org.apache.comet.serde.ExprOuterClass.Expr
import org.apache.comet.serde.QueryPlanSerde.{exprToProtoInternal,
optExprWithInfo, scalarFunctionExprToProto,
scalarFunctionExprToProtoWithReturnType, serializeDataType}
-object CometYear extends CometExpressionSerde[Year] {
- override def convert(
- expr: Year,
+private object CometGetDateField extends Enumeration {
+ type CometGetDateField = Value
+
+ // See:
https://datafusion.apache.org/user-guide/sql/scalar_functions.html#date-part
+ val Year: Value = Value("year")
+ val Month: Value = Value("month")
+ val DayOfMonth: Value = Value("day")
+ // Datafusion: day of the week where Sunday is 0, but spark sunday is 1 (1 =
Sunday,
+ // 2 = Monday, ..., 7 = Saturday).
+ val DayOfWeek: Value = Value("dow")
+ val DayOfYear: Value = Value("doy")
+ val WeekDay: Value = Value("isodow") // day of the week where Monday is 0
+ val WeekOfYear: Value = Value("week")
+ val Quarter: Value = Value("quarter")
+}
+
+/**
+ * Convert spark [[org.apache.spark.sql.catalyst.expressions.GetDateField]]
expressions to
+ * Datafusion
+ *
[[https://datafusion.apache.org/user-guide/sql/scalar_functions.html#date-part
datepart]]
+ * function.
+ */
+trait CometExprGetDateField[T <: GetDateField] {
+ def getDateField(
+ expr: T,
+ field: CometGetDateField,
inputs: Seq[Attribute],
binding: Boolean): Option[ExprOuterClass.Expr] = {
- val periodType = exprToProtoInternal(Literal("year"), inputs, binding)
+ val periodType = exprToProtoInternal(Literal(field.toString), inputs,
binding)
val childExpr = exprToProtoInternal(expr.child, inputs, binding)
val optExpr = scalarFunctionExprToProto("datepart", Seq(periodType,
childExpr): _*)
.map(e => {
@@ -51,6 +75,104 @@ object CometYear extends CometExpressionSerde[Year] {
}
}
+object CometYear extends CometExpressionSerde[Year] with
CometExprGetDateField[Year] {
+ override def convert(
+ expr: Year,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ getDateField(expr, CometGetDateField.Year, inputs, binding)
+ }
+}
+
+object CometMonth extends CometExpressionSerde[Month] with
CometExprGetDateField[Month] {
+ override def convert(
+ expr: Month,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ getDateField(expr, CometGetDateField.Month, inputs, binding)
+ }
+}
+
+object CometDayOfMonth
+ extends CometExpressionSerde[DayOfMonth]
+ with CometExprGetDateField[DayOfMonth] {
+ override def convert(
+ expr: DayOfMonth,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ getDateField(expr, CometGetDateField.DayOfMonth, inputs, binding)
+ }
+}
+
+object CometDayOfWeek
+ extends CometExpressionSerde[DayOfWeek]
+ with CometExprGetDateField[DayOfWeek] {
+ override def convert(
+ expr: DayOfWeek,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ // Datafusion: day of the week where Sunday is 0, but spark sunday is 1 (1
= Sunday,
+ // 2 = Monday, ..., 7 = Saturday). So we need to add 1 to the result of
datepart(dow, ...)
+ val optExpr = getDateField(expr, CometGetDateField.DayOfWeek, inputs,
binding)
+ .zip(exprToProtoInternal(Literal(1), inputs, binding))
+ .map { case (left, right) =>
+ Expr
+ .newBuilder()
+ .setAdd(
+ ExprOuterClass.MathExpr
+ .newBuilder()
+ .setLeft(left)
+ .setRight(right)
+ .setEvalMode(ExprOuterClass.EvalMode.LEGACY)
+ .setReturnType(serializeDataType(IntegerType).get)
+ .build())
+ .build()
+ }
+ .headOption
+ optExprWithInfo(optExpr, expr, expr.child)
+ }
+}
+
+object CometWeekDay extends CometExpressionSerde[WeekDay] with
CometExprGetDateField[WeekDay] {
+ override def convert(
+ expr: WeekDay,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ getDateField(expr, CometGetDateField.WeekDay, inputs, binding)
+ }
+}
+
+object CometDayOfYear
+ extends CometExpressionSerde[DayOfYear]
+ with CometExprGetDateField[DayOfYear] {
+ override def convert(
+ expr: DayOfYear,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ getDateField(expr, CometGetDateField.DayOfYear, inputs, binding)
+ }
+}
+
+object CometWeekOfYear
+ extends CometExpressionSerde[WeekOfYear]
+ with CometExprGetDateField[WeekOfYear] {
+ override def convert(
+ expr: WeekOfYear,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ getDateField(expr, CometGetDateField.WeekOfYear, inputs, binding)
+ }
+}
+
+object CometQuarter extends CometExpressionSerde[Quarter] with
CometExprGetDateField[Quarter] {
+ override def convert(
+ expr: Quarter,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ getDateField(expr, CometGetDateField.Quarter, inputs, binding)
+ }
+}
+
object CometHour extends CometExpressionSerde[Hour] {
override def convert(
expr: Hour,
diff --git a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
index ee22c9b97..0e1d4fc24 100644
--- a/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
+++ b/spark/src/test/scala/org/apache/comet/CometExpressionSuite.scala
@@ -1715,14 +1715,17 @@ class CometExpressionSuite extends CometTestBase with
AdaptiveSparkPlanHelper {
}
}
- test("Year") {
+ test("DatePart functions:
Year/Month/DayOfMonth/DayOfWeek/DayOfYear/WeekOfYear/Quarter") {
Seq(false, true).foreach { dictionary =>
withSQLConf("parquet.enable.dictionary" -> dictionary.toString) {
val table = "test"
withTable(table) {
sql(s"create table $table(col timestamp) using parquet")
- sql(s"insert into $table values (now()), (null)")
- checkSparkAnswerAndOperator(s"SELECT year(col) FROM $table")
+ sql(s"insert into $table values (now()), (timestamp('1900-01-01')),
(null)")
+ // TODO: weekday(col)
https://github.com/apache/datafusion-comet/issues/2330
+ checkSparkAnswerAndOperator(
+ "SELECT col, year(col), month(col), day(col)," +
+ s" dayofweek(col), dayofyear(col), weekofyear(col), quarter(col)
FROM $table")
}
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]