Github user mgaido91 commented on a diff in the pull request: https://github.com/apache/spark/pull/21986#discussion_r207816072 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala --- @@ -210,3 +221,66 @@ case class ArrayTransform( override def prettyName: String = "transform" } + +/** + * Filters entries in a map using the provided function. + */ +@ExpressionDescription( +usage = "_FUNC_(expr, func) - Filters entries in a map using the function.", +examples = """ + Examples: + > SELECT _FUNC_(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v); + [1 -> 0, 3 -> -1] + """, +since = "2.4.0") +case class MapFilter( + input: Expression, + function: Expression) + extends MapBasedUnaryHigherOrderFunction with CodegenFallback { + + @transient val (keyType, valueType, valueContainsNull) = input.dataType match { + case MapType(kType, vType, vContainsNull) => (kType, vType, vContainsNull) + case _ => + val MapType(kType, vType, vContainsNull) = MapType.defaultConcreteType + (kType, vType, vContainsNull) + } + + @transient lazy val (keyVar, valueVar) = { + val args = function.asInstanceOf[LambdaFunction].arguments + (args.head.asInstanceOf[NamedLambdaVariable], args.tail.head.asInstanceOf[NamedLambdaVariable]) + } + + override def bind(f: (Expression, Seq[(DataType, Boolean)]) => LambdaFunction): MapFilter = { + function match { + case LambdaFunction(_, _, _) => + copy(function = f(function, (keyType, false) :: (valueType, valueContainsNull) :: Nil)) + } + } + + override def nullable: Boolean = input.nullable + + override def eval(input: InternalRow): Any = { + val m = this.input.eval(input).asInstanceOf[MapData] + if (m == null) { + null + } else { + val retKeys = new mutable.ListBuffer[Any] + val retValues = new mutable.ListBuffer[Any] --- End diff -- But I just checked that in `ArrayFilter` you initialized it with the number of incoming elements. So i think there is no difference in terms of performance, as using an upper value for the number of output elements we are sure no copy is performed.
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org