Tim Lee created SPARK-56502:
-------------------------------
Summary: Fix integer overflow in DirectByteBufferOutputStream
capacity check
Key: SPARK-56502
URL: https://issues.apache.org/jira/browse/SPARK-56502
Project: Spark
Issue Type: Bug
Components: Spark Core
Affects Versions: 4.1.1, 4.0.2, 4.0.1, 4.0.0, 4.1.0, 4.2.0
Reporter: Tim Lee
{{DirectByteBufferOutputStream.write(b, off, len)}} computes
{{buffer.position() + len}} using {{Int}} arithmetic. When the sum exceeds
{{{}Integer.MAX_VALUE{}}}, it wraps to negative, bypassing the
{{ensureCapacity}} guard. The subsequent {{buffer.put()}} writes past the
buffer, causing a SIGSEGV.
Here's an example JVM crash stack trace:
{code:java}
J 17470 c2 org.apache.spark.util.DirectByteBufferOutputStream.write([BII)V (25
bytes) @ 0x00007f47dcaaa7d2 [0x00007f47dcaaa5c0+0x0000000000000212]
J 30250 c2
java.nio.channels.Channels$WritableByteChannelImpl.write(Ljava/nio/ByteBuffer;)I
[email protected] (151 bytes) @ 0x00007f47dd81ffd4
[0x00007f47dd81fd00+0x00000000000002d4]
J 21322 c1
org.apache.arrow.vector.ipc.WriteChannel.write(Ljava/nio/ByteBuffer;)J (64
bytes) @ 0x00007f47cdb37184 [0x00007f47cdb36d20+0x0000000000000464]
J 23536 c1 org.apache.arrow.vector.ipc.WriteChannel.writeIntLittleEndian(I)J
(17 bytes) @ 0x00007f47cdd1fe8c [0x00007f47cdd1fce0+0x00000000000001ac]
j
org.apache.arrow.vector.ipc.message.MessageSerializer.serialize(Lorg/apache/arrow/vector/ipc/WriteChannel;Lorg/apache/arrow/vector/ipc/message/ArrowRecordBatch;Lorg/apache/arrow/vector/ipc/message/IpcOption;)Lorg/apache/arrow/vector/ipc/message/ArrowBlock;+58
j
org.apache.arrow.vector.ipc.ArrowWriter.writeRecordBatch(Lorg/apache/arrow/vector/ipc/message/ArrowRecordBatch;)Lorg/apache/arrow/vector/ipc/message/ArrowBlock;+9
j org.apache.arrow.vector.ipc.ArrowWriter.writeBatch()V+28
J 29845 c1
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextBatchToArrowStream(Lorg/apache/arrow/vector/VectorSchemaRoot;Lorg/apache/arrow/vector/ipc/ArrowStreamWriter;Ljava/io/DataOutputStream;Lscala/collection/Iterator;)Z
(233 bytes) @ 0x00007f47cde9603c [0x00007f47cde949e0+0x000000000000165c]
j
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextBatchToArrowStream$(Lorg/apache/spark/sql/execution/python/BasicPythonArrowInput;Lorg/apache/arrow/vector/VectorSchemaRoot;Lorg/apache/arrow/vector/ipc/ArrowStreamWriter;Ljava/io/DataOutputStream;Lscala/collection/Iterator;)Z+6
j
org.apache.spark.sql.execution.python.BaseArrowPythonRunner.writeNextBatchToArrowStream(Lorg/apache/arrow/vector/VectorSchemaRoot;Lorg/apache/arrow/vector/ipc/ArrowStreamWriter;Ljava/io/DataOutputStream;Lscala/collection/Iterator;)Z+6
j
org.apache.spark.sql.execution.python.PythonArrowInput$ArrowWriter.writeNextInputToStream(Ljava/io/DataOutputStream;)Z+128
J 29886 c1
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker()V
(377 bytes) @ 0x00007f47ce29f8c4 [0x00007f47ce29efa0+0x0000000000000924]
j
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read([BII)I+204
J 20370 c2 java.io.BufferedInputStream.fill()V [email protected] (203 bytes) @
0x00007f47dd129790 [0x00007f47dd129720+0x0000000000000070]
J 12173 c2 java.io.BufferedInputStream.read()I [email protected] (49 bytes) @
0x00007f47dc7cc48c [0x00007f47dc7cc3a0+0x00000000000000ec]
J 8989 c1 java.io.DataInputStream.readInt()I [email protected] (68 bytes) @
0x00007f47cd16d40c [0x00007f47cd16d300+0x000000000000010c]
j
org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read()Ljava/lang/Object;+216
j org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext()Z+16
j org.apache.spark.InterruptibleIterator.hasNext()Z+11
j scala.collection.Iterator$$anon$11.hasNext()Z+16
J 20285 c2 scala.collection.Iterator$$anon$10.hasNext()Z (10 bytes) @
0x00007f47dd1230e0 [0x00007f47dd1230a0+0x0000000000000040]
j
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage22.processNext()V+739
j org.apache.spark.sql.execution.BufferedRowIterator.hasNext()Z+11
j
org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext()Z+4
J 20285 c2 scala.collection.Iterator$$anon$10.hasNext()Z (10 bytes) @
0x00007f47dd1230e0 [0x00007f47dd1230a0+0x0000000000000040]
J 20285 c2 scala.collection.Iterator$$anon$10.hasNext()Z (10 bytes) @
0x00007f47dd1230e0 [0x00007f47dd1230a0+0x0000000000000040]
j org.apache.spark.sql.execution.python.BatchIterator.hasNext()Z+4
j
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextBatchToArrowStream(Lorg/apache/arrow/vector/VectorSchemaRoot;Lorg/apache/arrow/vector/ipc/ArrowStreamWriter;Ljava/io/DataOutputStream;Lscala/collection/Iterator;)Z+16
j
org.apache.spark.sql.execution.python.BasicPythonArrowInput.writeNextBatchToArrowStream$(Lorg/apache/spark/sql/execution/python/BasicPythonArrowInput;Lorg/apache/arrow/vector/VectorSchemaRoot;Lorg/apache/arrow/vector/ipc/ArrowStreamWriter;Ljava/io/DataOutputStream;Lscala/collection/Iterator;)Z+6
j
org.apache.spark.sql.execution.python.BaseArrowPythonRunner.writeNextBatchToArrowStream(Lorg/apache/arrow/vector/VectorSchemaRoot;Lorg/apache/arrow/vector/ipc/ArrowStreamWriter;Ljava/io/DataOutputStream;Lscala/collection/Iterator;)Z+6
j
org.apache.spark.sql.execution.python.PythonArrowInput$ArrowWriter.writeNextInputToStream(Ljava/io/DataOutputStream;)Z+128
j
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.writeAdditionalInputToPythonWorker()V+107
j
org.apache.spark.api.python.BasePythonRunner$ReaderInputStream.read([BII)I+204
J 20370 c2 java.io.BufferedInputStream.fill()V [email protected] (203 bytes) @
0x00007f47dd129790 [0x00007f47dd129720+0x0000000000000070]
J 12173 c2 java.io.BufferedInputStream.read()I [email protected] (49 bytes) @
0x00007f47dc7cc48c [0x00007f47dc7cc3a0+0x00000000000000ec]
J 8989 c1 java.io.DataInputStream.readInt()I [email protected] (68 bytes) @
0x00007f47cd16d40c [0x00007f47cd16d300+0x000000000000010c]
j
org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read()Ljava/lang/Object;+216
j org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext()Z+16
...{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]