msamirkhan commented on a change in pull request #29354: URL: https://github.com/apache/spark/pull/29354#discussion_r465994643
########## File path: external/avro/src/main/scala/org/apache/spark/sql/avro/SparkAvroDatumReader.scala ########## @@ -452,71 +452,73 @@ class SparkAvroDatumReader[T]( private[this] def getArrayReader(avroType: Schema, elementType: DataType, - path: List[String], - reuseObj: Boolean + path: List[String] ): (CatalystDataUpdater, Int, ResolvingDecoder) => Unit = { val elementReader = newReader(avroType.getElementType, elementType, path, false) - val array = new ArrayBuffer[Any] - val arrayUpdater = new ArrayBufferUpdater(array) - val toArrayConverter = getToArrayDataConverter(elementType, array) + val arrayCreator = getArrayDataCreator(elementType) + val arrayExpander = getArrayDataExpander(elementType) (updater, ordinal, in) => { var length = in.readArrayStart() - array.sizeHint(length.toInt) + var array = arrayCreator(length) // if (length == 0) 0 else 1) + val arrayUpdater = new ArrayDataUpdater(array) + var base: Int = 0 while (length > 0) { var i = 0 while (i < length) { - elementReader(arrayUpdater, i, in) + elementReader(arrayUpdater, base + i, in) i += 1 + // array = arrayExpander(arrayUpdater, if (i == length) 0 else 1) } + base += length.toInt length = in.arrayNext() - array.sizeHint((array.length + length).toInt) + array = arrayExpander(arrayUpdater, length) // if (length == 0) 0 else 1) } - updater.set(ordinal, toArrayConverter()) - array.clear() + updater.set(ordinal, array) } } private[this] def getMapReader(avroType: Schema, keyType: DataType, valueType: DataType, - path: List[String], - reuseObj: Boolean + path: List[String] ): (CatalystDataUpdater, Int, ResolvingDecoder) => Unit = { val keyReader = newReader(SchemaBuilder.builder().stringType(), StringType, path, false) - val keyArray = new ArrayBuffer[Any] - val keyArrayUpdater = new ArrayBufferUpdater(keyArray) - val toKeyArrayConverter = getToArrayDataConverter(keyType, keyArray) + val keyArrayCreator = getArrayDataCreator(keyType) + val keyArrayExpander = getArrayDataExpander(keyType) val valueReader = newReader(avroType.getValueType, valueType, path, false) - val valueArray = new ArrayBuffer[Any] - val valueArrayUpdater = new ArrayBufferUpdater(valueArray) - val toValueArrayConverter = getToArrayDataConverter(valueType, valueArray) + val valueArrayCreator = getArrayDataCreator(valueType) + val valueArrayExpander = getArrayDataExpander(valueType) (updater, ordinal, in) => { var length = in.readMapStart() - keyArray.sizeHint(length.toInt) - valueArray.sizeHint(length.toInt) + var keyArray = keyArrayCreator(length) // if (length == 0) 0 else 1) + var valueArray = valueArrayCreator(length) // if (length == 0) 0 else 1) + val keyArrayUpdater = new ArrayDataUpdater(keyArray) + val valueArrayUpdater = new ArrayDataUpdater(valueArray) + var base: Int = 0 while (length > 0) { var i = 0 while (i < length) { - keyReader(keyArrayUpdater, i, in) - valueReader(valueArrayUpdater, i, in) + keyReader(keyArrayUpdater, base + i, in) + valueReader(valueArrayUpdater, base + i, in) i += 1 + // keyArray = keyArrayExpander(keyArrayUpdater, if (i == length) 0 else 1) + // valueArray = valueArrayExpander(valueArrayUpdater, if (i == length) 0 else 1) } + base += length.toInt length = in.mapNext() - keyArray.sizeHint((keyArray.length + length).toInt) - valueArray.sizeHint((valueArray.length + length).toInt) + keyArray = keyArrayExpander(keyArrayUpdater, length) // if (length == 0) 0 else 1) + valueArray = valueArrayExpander(valueArrayUpdater, length) // if (length == 0) 0 else 1) } // The Avro map will never have null or duplicated map keys, it's safe to create a // ArrayBasedMapData directly here. - updater.set(ordinal, new ArrayBasedMapData(toKeyArrayConverter(), toValueArrayConverter())) - keyArray.clear() - valueArray.clear() + updater.set(ordinal, new ArrayBasedMapData(keyArray, valueArray)) } } Review comment: Changes to reading maps is the same as the ones to reading arrays https://github.com/apache/spark/pull/29354/commits/2c62ac9ae960582058e96860002f7768eebb95f2#r465994403 ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org