This is an automated email from the ASF dual-hosted git repository.
agrove pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/datafusion-comet.git
The following commit(s) were added to refs/heads/main by this push:
new 6d5bdbe2d chore: Refactor serde for `ArrayCompact` and `ArrayFilter`
(#2536)
6d5bdbe2d is described below
commit 6d5bdbe2d08ed0e2488a1f8406be15f057ae68d3
Author: Andy Grove <[email protected]>
AuthorDate: Mon Oct 6 13:57:05 2025 -0600
chore: Refactor serde for `ArrayCompact` and `ArrayFilter` (#2536)
---
docs/source/user-guide/latest/expressions.md | 7 ++++---
.../scala/org/apache/comet/serde/QueryPlanSerde.scala | 5 ++---
.../main/scala/org/apache/comet/serde/arrays.scala | 19 ++++++++++++++++++-
3 files changed, 24 insertions(+), 7 deletions(-)
diff --git a/docs/source/user-guide/latest/expressions.md
b/docs/source/user-guide/latest/expressions.md
index 4dbcec6f0..e727642dc 100644
--- a/docs/source/user-guide/latest/expressions.md
+++ b/docs/source/user-guide/latest/expressions.md
@@ -61,7 +61,7 @@ incompatible expressions.
## String Functions
| Expression | Spark-Compatible? | Compatibility Notes
|
-|-----------------| ----------------- |
----------------------------------------------------------------------------------------------------------
|
+| --------------- | ----------------- |
----------------------------------------------------------------------------------------------------------
|
| Ascii | Yes |
|
| BitLength | Yes |
|
| Chr | Yes |
|
@@ -93,7 +93,7 @@ incompatible expressions.
## Date/Time Functions
| Expression | SQL | Spark-Compatible? |
Compatibility Notes
|
-|----------------|------------------------------| -----------------
|----------------------------------------------------------------------------------------------------------------------|
+| -------------- | ---------------------------- | ----------------- |
--------------------------------------------------------------------------------------------------------------------
|
| DateAdd | `date_add` | Yes |
|
| DateSub | `date_sub` | Yes |
|
| DatePart | `date_part(field, source)` | Yes |
Supported values of `field`:
`year`/`month`/`week`/`day`/`dayofweek`/`dayofweek_iso`/`doy`/`quarter`/`hour`/`minute`
|
@@ -116,7 +116,7 @@ incompatible expressions.
## Math Expressions
| Expression | SQL | Spark-Compatible? | Compatibility Notes
|
-|----------------|-----------|-------------------|-----------------------------------|
+| -------------- | --------- | ----------------- |
--------------------------------- |
| Acos | `acos` | Yes |
|
| Add | `+` | Yes |
|
| Asin | `asin` | Yes |
|
@@ -208,6 +208,7 @@ incompatible expressions.
| ArrayContains | Yes |
|
| ArrayDistinct | No | Behaves differently than spark. Comet
first sorts then removes duplicates while Spark preserves the original order.
|
| ArrayExcept | No |
|
+| ArrayFilter | Yes | Only supports case where function is
`IsNotNull`
|
| ArrayInsert | No |
|
| ArrayIntersect | No |
|
| ArrayJoin | No |
|
diff --git a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
index 4d1daacd6..8fc7c2d63 100644
--- a/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/QueryPlanSerde.scala
@@ -69,10 +69,11 @@ object QueryPlanSerde extends Logging with CometExprShim {
private val arrayExpressions: Map[Class[_ <: Expression],
CometExpressionSerde[_]] = Map(
classOf[ArrayAppend] -> CometArrayAppend,
- // TODO ArrayCompact
+ classOf[ArrayCompact] -> CometArrayCompact,
classOf[ArrayContains] -> CometArrayContains,
classOf[ArrayDistinct] -> CometArrayDistinct,
classOf[ArrayExcept] -> CometArrayExcept,
+ classOf[ArrayFilter] -> CometArrayFilter,
classOf[ArrayInsert] -> CometArrayInsert,
classOf[ArrayIntersect] -> CometArrayIntersect,
classOf[ArrayJoin] -> CometArrayJoin,
@@ -911,8 +912,6 @@ object QueryPlanSerde extends Logging with CometExprShim {
withInfo(expr, bloomFilter, value)
None
}
- case af @ ArrayFilter(_, func) if
func.children.head.isInstanceOf[IsNotNull] =>
- convert(af, CometArrayCompact)
case l @ Length(child) if child.dataType == BinaryType =>
withInfo(l, "Length on BinaryType is not supported")
None
diff --git a/spark/src/main/scala/org/apache/comet/serde/arrays.scala
b/spark/src/main/scala/org/apache/comet/serde/arrays.scala
index 09ea547cc..334f8ee8b 100644
--- a/spark/src/main/scala/org/apache/comet/serde/arrays.scala
+++ b/spark/src/main/scala/org/apache/comet/serde/arrays.scala
@@ -21,7 +21,7 @@ package org.apache.comet.serde
import scala.annotation.tailrec
-import org.apache.spark.sql.catalyst.expressions.{ArrayAppend, ArrayContains,
ArrayDistinct, ArrayExcept, ArrayInsert, ArrayIntersect, ArrayJoin, ArrayMax,
ArrayMin, ArrayRemove, ArrayRepeat, ArraysOverlap, ArrayUnion, Attribute,
CreateArray, ElementAt, Expression, Flatten, GetArrayItem, Literal, Reverse}
+import org.apache.spark.sql.catalyst.expressions.{ArrayAppend, ArrayContains,
ArrayDistinct, ArrayExcept, ArrayFilter, ArrayInsert, ArrayIntersect,
ArrayJoin, ArrayMax, ArrayMin, ArrayRemove, ArrayRepeat, ArraysOverlap,
ArrayUnion, Attribute, CreateArray, ElementAt, Expression, Flatten,
GetArrayItem, IsNotNull, Literal, Reverse}
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
@@ -505,6 +505,23 @@ object CometFlatten extends CometExpressionSerde[Flatten]
with ArraysBase {
}
}
+object CometArrayFilter extends CometExpressionSerde[ArrayFilter] {
+
+ override def getSupportLevel(expr: ArrayFilter): SupportLevel = {
+ expr.function.children.headOption match {
+ case Some(_: IsNotNull) => Compatible()
+ case _ => Unsupported()
+ }
+ }
+
+ override def convert(
+ expr: ArrayFilter,
+ inputs: Seq[Attribute],
+ binding: Boolean): Option[ExprOuterClass.Expr] = {
+ CometArrayCompact.convert(expr, inputs, binding)
+ }
+}
+
trait ArraysBase {
def isTypeSupported(dt: DataType): Boolean = {
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]