Github user bersprockets commented on a diff in the pull request: https://github.com/apache/spark/pull/21073#discussion_r186570491 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -116,6 +117,169 @@ case class MapValues(child: Expression) override def prettyName: String = "map_values" } +/** + * Returns the union of all the given maps. + */ +@ExpressionDescription( +usage = "_FUNC_(map, ...) - Returns the union of all the given maps", +examples = """ + Examples: + > SELECT _FUNC_(map(1, 'a', 2, 'b'), map(2, 'c', 3, 'd')); + [[1 -> "a"], [2 -> "c"], [3 -> "d"] + """, since = "2.4.0") +case class MapConcat(children: Seq[Expression]) extends Expression { + + override def checkInputDataTypes(): TypeCheckResult = { + // check key types and value types separately to allow valueContainsNull to vary + if (children.exists(!_.dataType.isInstanceOf[MapType])) { + TypeCheckResult.TypeCheckFailure( + s"The given input of function $prettyName should all be of type map, " + + "but they are " + children.map(_.dataType.simpleString).mkString("[", ", ", "]")) + } else if (children.map(_.dataType.asInstanceOf[MapType].keyType) + .exists(_.isInstanceOf[MapType])) { + // map_concat needs to pick a winner when multiple maps contain the same key. map_concat + // can do that only if it can detect when two keys are the same. SPARK-9415 states "map type + // should not support equality, hash". As a result, map_concat does not support a map type + // as a key + TypeCheckResult.TypeCheckFailure( + s"The given input maps of function $prettyName cannot have a map type as a key") + } else if (children.map(_.dataType.asInstanceOf[MapType].keyType).distinct.length > 1) { + TypeCheckResult.TypeCheckFailure( + s"The given input maps of function $prettyName should all be the same type, " + + "but they are " + children.map(_.dataType.simpleString).mkString("[", ", ", "]")) + } else if (children.map(_.dataType.asInstanceOf[MapType].valueType).distinct.length > 1) { + TypeCheckResult.TypeCheckFailure( + s"The given input maps of function $prettyName should all be the same type, " + + "but they are " + children.map(_.dataType.simpleString).mkString("[", ", ", "]")) + } else { + TypeCheckResult.TypeCheckSuccess + } + } + + override def dataType: MapType = { + MapType( + keyType = children.headOption + .map(_.dataType.asInstanceOf[MapType].keyType).getOrElse(StringType), + valueType = children.headOption + .map(_.dataType.asInstanceOf[MapType].valueType).getOrElse(StringType), + valueContainsNull = children.map { c => + c.dataType.asInstanceOf[MapType] + }.exists(_.valueContainsNull) + ) + } + + override def nullable: Boolean = children.exists(_.nullable) + + override def eval(input: InternalRow): Any = { + val union = new util.LinkedHashMap[Any, Any]() + children.map(_.eval(input)).foreach { raw => + if (raw == null) { + return null + } + val map = raw.asInstanceOf[MapData] + map.foreach(dataType.keyType, dataType.valueType, (k, v) => + union.put(k, v) + ) + } + val (keyArray, valueArray) = union.entrySet().toArray().map { e => --- End diff -- I would imagine bad things would happen before you got this far (even Map's size method returns an Int).
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org