Github user mgaido91 commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21986#discussion_r207816072
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
 ---
    @@ -210,3 +221,66 @@ case class ArrayTransform(
     
       override def prettyName: String = "transform"
     }
    +
    +/**
    + * Filters entries in a map using the provided function.
    + */
    +@ExpressionDescription(
    +usage = "_FUNC_(expr, func) - Filters entries in a map using the 
function.",
    +examples = """
    +    Examples:
    +      > SELECT _FUNC_(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v);
    +       [1 -> 0, 3 -> -1]
    +  """,
    +since = "2.4.0")
    +case class MapFilter(
    +    input: Expression,
    +    function: Expression)
    +  extends MapBasedUnaryHigherOrderFunction with CodegenFallback {
    +
    +  @transient val (keyType, valueType, valueContainsNull) = input.dataType 
match {
    +    case MapType(kType, vType, vContainsNull) => (kType, vType, 
vContainsNull)
    +    case _ =>
    +      val MapType(kType, vType, vContainsNull) = 
MapType.defaultConcreteType
    +      (kType, vType, vContainsNull)
    +  }
    +
    +  @transient lazy val (keyVar, valueVar) = {
    +    val args = function.asInstanceOf[LambdaFunction].arguments
    +    (args.head.asInstanceOf[NamedLambdaVariable], 
args.tail.head.asInstanceOf[NamedLambdaVariable])
    +  }
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => 
LambdaFunction): MapFilter = {
    +    function match {
    +      case LambdaFunction(_, _, _) =>
    +        copy(function = f(function, (keyType, false) :: (valueType, 
valueContainsNull) :: Nil))
    +    }
    +  }
    +
    +  override def nullable: Boolean = input.nullable
    +
    +  override def eval(input: InternalRow): Any = {
    +    val m = this.input.eval(input).asInstanceOf[MapData]
    +    if (m == null) {
    +      null
    +    } else {
    +      val retKeys = new mutable.ListBuffer[Any]
    +      val retValues = new mutable.ListBuffer[Any]
    --- End diff --
    
    But I just checked that in `ArrayFilter` you initialized it with the number 
of incoming elements. So i think there is no difference in terms of 
performance, as using an upper value for the number of output elements we are 
sure no copy is performed.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to