msamirkhan commented on a change in pull request #29354:
URL: https://github.com/apache/spark/pull/29354#discussion_r465994643



##########
File path: 
external/avro/src/main/scala/org/apache/spark/sql/avro/SparkAvroDatumReader.scala
##########
@@ -452,71 +452,73 @@ class SparkAvroDatumReader[T](
 
   private[this] def getArrayReader(avroType: Schema,
       elementType: DataType,
-      path: List[String],
-      reuseObj: Boolean
+      path: List[String]
   ): (CatalystDataUpdater, Int, ResolvingDecoder) => Unit = {
     val elementReader = newReader(avroType.getElementType, elementType, path, 
false)
-    val array = new ArrayBuffer[Any]
-    val arrayUpdater = new ArrayBufferUpdater(array)
-    val toArrayConverter = getToArrayDataConverter(elementType, array)
+    val arrayCreator = getArrayDataCreator(elementType)
+    val arrayExpander = getArrayDataExpander(elementType)
 
     (updater, ordinal, in) => {
       var length = in.readArrayStart()
-      array.sizeHint(length.toInt)
+      var array = arrayCreator(length) // if (length == 0) 0 else 1)
+      val arrayUpdater = new ArrayDataUpdater(array)
 
+      var base: Int = 0
       while (length > 0) {
         var i = 0
         while (i < length) {
-          elementReader(arrayUpdater, i, in)
+          elementReader(arrayUpdater, base + i, in)
           i += 1
+          // array = arrayExpander(arrayUpdater, if (i == length) 0 else 1)
         }
+        base += length.toInt
         length = in.arrayNext()
-        array.sizeHint((array.length + length).toInt)
+        array = arrayExpander(arrayUpdater, length) // if (length == 0) 0 else 
1)
       }
 
-      updater.set(ordinal, toArrayConverter())
-      array.clear()
+      updater.set(ordinal, array)
     }
   }
 
   private[this] def getMapReader(avroType: Schema,
       keyType: DataType,
       valueType: DataType,
-      path: List[String],
-      reuseObj: Boolean
+      path: List[String]
   ): (CatalystDataUpdater, Int, ResolvingDecoder) => Unit = {
     val keyReader = newReader(SchemaBuilder.builder().stringType(), 
StringType, path, false)
-    val keyArray = new ArrayBuffer[Any]
-    val keyArrayUpdater = new ArrayBufferUpdater(keyArray)
-    val toKeyArrayConverter = getToArrayDataConverter(keyType, keyArray)
+    val keyArrayCreator = getArrayDataCreator(keyType)
+    val keyArrayExpander = getArrayDataExpander(keyType)
 
     val valueReader = newReader(avroType.getValueType, valueType, path, false)
-    val valueArray = new ArrayBuffer[Any]
-    val valueArrayUpdater = new ArrayBufferUpdater(valueArray)
-    val toValueArrayConverter = getToArrayDataConverter(valueType, valueArray)
+    val valueArrayCreator = getArrayDataCreator(valueType)
+    val valueArrayExpander = getArrayDataExpander(valueType)
 
     (updater, ordinal, in) => {
       var length = in.readMapStart()
-      keyArray.sizeHint(length.toInt)
-      valueArray.sizeHint(length.toInt)
+      var keyArray = keyArrayCreator(length) // if (length == 0) 0 else 1)
+      var valueArray = valueArrayCreator(length) // if (length == 0) 0 else 1)
+      val keyArrayUpdater = new ArrayDataUpdater(keyArray)
+      val valueArrayUpdater = new ArrayDataUpdater(valueArray)
 
+      var base: Int = 0
       while (length > 0) {
         var i = 0
         while (i < length) {
-          keyReader(keyArrayUpdater, i, in)
-          valueReader(valueArrayUpdater, i, in)
+          keyReader(keyArrayUpdater, base + i, in)
+          valueReader(valueArrayUpdater, base + i, in)
           i += 1
+          // keyArray = keyArrayExpander(keyArrayUpdater, if (i == length) 0 
else 1)
+          // valueArray = valueArrayExpander(valueArrayUpdater, if (i == 
length) 0 else 1)
         }
+        base += length.toInt
         length = in.mapNext()
-        keyArray.sizeHint((keyArray.length + length).toInt)
-        valueArray.sizeHint((valueArray.length + length).toInt)
+        keyArray = keyArrayExpander(keyArrayUpdater, length) // if (length == 
0) 0 else 1)
+        valueArray = valueArrayExpander(valueArrayUpdater, length) // if 
(length == 0) 0 else 1)
       }
 
       // The Avro map will never have null or duplicated map keys, it's safe 
to create a
       // ArrayBasedMapData directly here.
-      updater.set(ordinal, new ArrayBasedMapData(toKeyArrayConverter(), 
toValueArrayConverter()))
-      keyArray.clear()
-      valueArray.clear()
+      updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray))
     }
   }

Review comment:
       Changes to reading maps is the same as the ones to reading arrays 
https://github.com/apache/spark/pull/29354/commits/2c62ac9ae960582058e96860002f7768eebb95f2#r465994403




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to