Github user cloud-fan commented on a diff in the pull request:

    https://github.com/apache/spark/pull/23124#discussion_r235849825
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
 ---
    @@ -751,171 +739,46 @@ case class MapFromEntries(child: Expression) extends 
UnaryExpression {
           s"${child.dataType.catalogString} type. $prettyName accepts only 
arrays of pair structs.")
       }
     
    +  private lazy val mapBuilder = new ArrayBasedMapBuilder(dataType.keyType, 
dataType.valueType)
    +
       override protected def nullSafeEval(input: Any): Any = {
    -    val arrayData = input.asInstanceOf[ArrayData]
    -    val numEntries = arrayData.numElements()
    +    val entries = input.asInstanceOf[ArrayData]
    +    val numEntries = entries.numElements()
         var i = 0
    -    if(nullEntries) {
    +    if (nullEntries) {
           while (i < numEntries) {
    -        if (arrayData.isNullAt(i)) return null
    +        if (entries.isNullAt(i)) return null
             i += 1
           }
         }
    -    val keyArray = new Array[AnyRef](numEntries)
    -    val valueArray = new Array[AnyRef](numEntries)
    +
    +    mapBuilder.reset()
         i = 0
         while (i < numEntries) {
    -      val entry = arrayData.getStruct(i, 2)
    -      val key = entry.get(0, dataType.keyType)
    -      if (key == null) {
    -        throw new RuntimeException("The first field from a struct (key) 
can't be null.")
    -      }
    -      keyArray.update(i, key)
    -      val value = entry.get(1, dataType.valueType)
    -      valueArray.update(i, value)
    +      mapBuilder.put(entries.getStruct(i, 2))
           i += 1
         }
    -    ArrayBasedMapData(keyArray, valueArray)
    +    mapBuilder.build()
       }
     
       override protected def doGenCode(ctx: CodegenContext, ev: ExprCode): 
ExprCode = {
         nullSafeCodeGen(ctx, ev, c => {
           val numEntries = ctx.freshName("numEntries")
    -      val isKeyPrimitive = CodeGenerator.isPrimitiveType(dataType.keyType)
    -      val isValuePrimitive = 
CodeGenerator.isPrimitiveType(dataType.valueType)
    -      val code = if (isKeyPrimitive && isValuePrimitive) {
    -        genCodeForPrimitiveElements(ctx, c, ev.value, numEntries)
    --- End diff --
    
    since we need to check duplicated map keys, it's not possible to apply this 
trick anymore, as we need to overwrite values if the key appears before.


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to