Github user kiszk commented on a diff in the pull request: https://github.com/apache/spark/pull/20850#discussion_r175657704 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/InterpretedUnsafeProjection.scala --- @@ -178,81 +171,76 @@ object InterpretedUnsafeProjection extends UnsafeProjectionCreator { case StructType(fields) => val numFields = fields.length - val rowWriter = new UnsafeRowWriter(bufferHolder, numFields) - val structWriter = generateStructWriter(bufferHolder, rowWriter, fields) + val rowWriter = new UnsafeRowWriter(writer, numFields) + val structWriter = generateStructWriter(rowWriter, fields) (v, i) => { - val tmpCursor = bufferHolder.cursor + rowWriter.markCursor() v.getStruct(i, fields.length) match { case row: UnsafeRow => writeUnsafeData( - bufferHolder, + rowWriter, row.getBaseObject, row.getBaseOffset, row.getSizeInBytes) case row => // Nested struct. We don't know where this will start because a row can be // variable length, so we need to update the offsets and zero out the bit mask. - rowWriter.reset() + rowWriter.resetRowWriter() structWriter.apply(row) } - writer.setOffsetAndSize(i, tmpCursor, bufferHolder.cursor - tmpCursor) + writer.setOffsetAndSizeFromMark(i) } case ArrayType(elementType, containsNull) => - val arrayWriter = new UnsafeArrayWriter - val elementSize = getElementSize(elementType) + val arrayWriter = new UnsafeArrayWriter(writer, getElementSize(elementType)) val elementWriter = generateFieldWriter( - bufferHolder, arrayWriter, elementType, containsNull) (v, i) => { - val tmpCursor = bufferHolder.cursor - writeArray(bufferHolder, arrayWriter, elementWriter, v.getArray(i), elementSize) - writer.setOffsetAndSize(i, tmpCursor, bufferHolder.cursor - tmpCursor) + arrayWriter.markCursor() --- End diff -- From the performance view, this abstraction may have more performance impact since we move temporal value on local frame into [that on Java stack](https://github.com/apache/spark/pull/20850/files#diff-e68c5a074209b9a20ee2aa42936571ceR103) ``` arrayWriter.markCursor() writeArray(arrayWriter, elementWriter, v.getArray(i)) writer.setOffsetAndSizeFromMark(i) ``` Is this implementation enough from the balance of performance and abstraction? Or, is it better to do like this? ``` val mark = arrayWriter.cursor() writeArray(arrayWriter, elementWriter, v.getArray(i)) writer.setOffsetAndSizeFromMark(i, mark) ``` @maropo @hvanhovell WDYT?
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org