[ 
https://issues.apache.org/jira/browse/SPARK-34020?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Darshat updated SPARK-34020:
----------------------------
    Description: 
We are using databricks on Azure, with Apache spack 3.0.0 and Scala 2.12. When 
two tables are joined - one with 36million rows, other with 4k rows we get an 
IndexOutOfBoundsException with arrow on the call stack.
The cluster has 72 nodes and 288 cores. Workers have 16gb memory overall. TheĀ 
spark.sql.shuffle.partitions is set to 288.

If the join key has uneven distribution, we tried to also partition it into 
1000 partitions of the join key using repartition but results in same error.

Any pointers on what can be causing this issue would be very helpful. Thanks,

Darshat

{{21/01/06 04:05:06 ERROR ArrowPythonRunner: Python worker exited unexpectedly 
(crashed)}}
{{org.apache.spark.api.python.PythonException: Traceback (most recent call 
last):}}
{{ File "/databricks/spark/python/pyspark/worker.py", line 640, in main}}
{{ eval_type = read_int(infile)}}
{{ File "/databricks/spark/python/pyspark/serializers.py", line 603, in 
read_int}}
{{ raise EOFError}}
{{EOFError}}{{at 
org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:585)}}
{{ at 
org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:99)}}
{{ at 
org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:49)}}
{{ at 
org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:538)}}
{{ at 
org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)}}
{{ at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)}}
{{ at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)}}
{{ at 
org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage16.processNext(Unknown
 Source)}}
{{ at 
org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)}}
{{ at 
org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:731)}}
{{ at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)}}
{{ at 
org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:177)}}
{{ at 
org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)}}
{{ at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)}}
{{ at 
org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)}}
{{ at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144)}}
{{ at org.apache.spark.scheduler.Task.run(Task.scala:117)}}
{{ at 
org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:639)}}
{{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
{{ at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:642)}}
{{ at 
java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)}}
{{ at 
java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)}}
{{ at java.lang.Thread.run(Thread.java:748)}}
{{Caused by: java.lang.IndexOutOfBoundsException: index: 0, length: 1073741824 
(expected: range(0, 0))}}
{{ at io.netty.buffer.ArrowBuf.checkIndex(ArrowBuf.java:716)}}
{{ at io.netty.buffer.ArrowBuf.setBytes(ArrowBuf.java:954)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.reallocDataBuffer(BaseVariableWidthVector.java:508)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.handleSafe(BaseVariableWidthVector.java:1239)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1066)}}
{{ at 
org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:278)}}
{{ at 
org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:139)}}
{{ at 
org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:93)}}
{{ at 
org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:100)}}
{{ at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)}}
{{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
{{ at 
org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.writeIteratorToStream(ArrowPythonRunner.scala:122)}}
{{ at 
org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:465)}}
{{ at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2124)}}
{{ at 
org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:257)}}
{{21/01/06 04:05:06 ERROR ArrowPythonRunner: This may have been caused by a 
prior exception:}}
{{java.lang.IndexOutOfBoundsException: index: 0, length: 1073741824 (expected: 
range(0, 0))}}
{{ at io.netty.buffer.ArrowBuf.checkIndex(ArrowBuf.java:716)}}
{{ at io.netty.buffer.ArrowBuf.setBytes(ArrowBuf.java:954)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.reallocDataBuffer(BaseVariableWidthVector.java:508)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.handleSafe(BaseVariableWidthVector.java:1239)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1066)}}
{{ at 
org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:278)}}
{{ at 
org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:139)}}
{{ at 
org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:93)}}
{{ at 
org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:100)}}
{{ at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)}}
{{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
{{ at 
org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.writeIteratorToStream(ArrowPythonRunner.scala:122)}}
{{ at 
org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:465)}}
{{ at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2124)}}
{{ at 
org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:257)}}
{{21/01/06 04:05:06 ERROR Executor: Exception in task 84.3 in stage 4349.0 (TID 
83266)}}
{{java.lang.IndexOutOfBoundsException: index: 0, length: 1073741824 (expected: 
range(0, 0))}}
{{ at io.netty.buffer.ArrowBuf.checkIndex(ArrowBuf.java:716)}}
{{ at io.netty.buffer.ArrowBuf.setBytes(ArrowBuf.java:954)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.reallocDataBuffer(BaseVariableWidthVector.java:508)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.handleSafe(BaseVariableWidthVector.java:1239)}}
{{ at 
org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1066)}}
{{ at 
org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:278)}}
{{ at 
org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:139)}}
{{ at 
org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:93)}}
{{ at 
org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:100)}}
{{ at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)}}
{{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
{{ at 
org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.writeIteratorToStream(ArrowPythonRunner.scala:122)}}
{{ at 
org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:465)}}
{{ at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2124)}}
{{ at 
org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:257)}}

> IndexOutOfBoundsException on merge of two pyspark frames
> --------------------------------------------------------
>
>                 Key: SPARK-34020
>                 URL: https://issues.apache.org/jira/browse/SPARK-34020
>             Project: Spark
>          Issue Type: Bug
>          Components: PySpark
>    Affects Versions: 3.0.0
>            Reporter: Darshat
>            Priority: Major
>
> We are using databricks on Azure, with Apache spack 3.0.0 and Scala 2.12. 
> When two tables are joined - one with 36million rows, other with 4k rows we 
> get an IndexOutOfBoundsException with arrow on the call stack.
> The cluster has 72 nodes and 288 cores. Workers have 16gb memory overall. TheĀ 
> spark.sql.shuffle.partitions is set to 288.
> If the join key has uneven distribution, we tried to also partition it into 
> 1000 partitions of the join key using repartition but results in same error.
> Any pointers on what can be causing this issue would be very helpful. Thanks,
> Darshat
> {{21/01/06 04:05:06 ERROR ArrowPythonRunner: Python worker exited 
> unexpectedly (crashed)}}
> {{org.apache.spark.api.python.PythonException: Traceback (most recent call 
> last):}}
> {{ File "/databricks/spark/python/pyspark/worker.py", line 640, in main}}
> {{ eval_type = read_int(infile)}}
> {{ File "/databricks/spark/python/pyspark/serializers.py", line 603, in 
> read_int}}
> {{ raise EOFError}}
> {{EOFError}}{{at 
> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:585)}}
> {{ at 
> org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:99)}}
> {{ at 
> org.apache.spark.sql.execution.python.PythonArrowOutput$$anon$1.read(PythonArrowOutput.scala:49)}}
> {{ at 
> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:538)}}
> {{ at 
> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37)}}
> {{ at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:489)}}
> {{ at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)}}
> {{ at 
> org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage16.processNext(Unknown
>  Source)}}
> {{ at 
> org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43)}}
> {{ at 
> org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:731)}}
> {{ at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:458)}}
> {{ at 
> org.apache.spark.shuffle.sort.UnsafeShuffleWriter.write(UnsafeShuffleWriter.java:177)}}
> {{ at 
> org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59)}}
> {{ at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:99)}}
> {{ at 
> org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:52)}}
> {{ at org.apache.spark.scheduler.Task.doRunTask(Task.scala:144)}}
> {{ at org.apache.spark.scheduler.Task.run(Task.scala:117)}}
> {{ at 
> org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$9(Executor.scala:639)}}
> {{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
> {{ at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:642)}}
> {{ at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)}}
> {{ at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)}}
> {{ at java.lang.Thread.run(Thread.java:748)}}
> {{Caused by: java.lang.IndexOutOfBoundsException: index: 0, length: 
> 1073741824 (expected: range(0, 0))}}
> {{ at io.netty.buffer.ArrowBuf.checkIndex(ArrowBuf.java:716)}}
> {{ at io.netty.buffer.ArrowBuf.setBytes(ArrowBuf.java:954)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.reallocDataBuffer(BaseVariableWidthVector.java:508)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.handleSafe(BaseVariableWidthVector.java:1239)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1066)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:278)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:139)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:93)}}
> {{ at 
> org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:100)}}
> {{ at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)}}
> {{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
> {{ at 
> org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.writeIteratorToStream(ArrowPythonRunner.scala:122)}}
> {{ at 
> org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:465)}}
> {{ at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2124)}}
> {{ at 
> org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:257)}}
> {{21/01/06 04:05:06 ERROR ArrowPythonRunner: This may have been caused by a 
> prior exception:}}
> {{java.lang.IndexOutOfBoundsException: index: 0, length: 1073741824 
> (expected: range(0, 0))}}
> {{ at io.netty.buffer.ArrowBuf.checkIndex(ArrowBuf.java:716)}}
> {{ at io.netty.buffer.ArrowBuf.setBytes(ArrowBuf.java:954)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.reallocDataBuffer(BaseVariableWidthVector.java:508)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.handleSafe(BaseVariableWidthVector.java:1239)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1066)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:278)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:139)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:93)}}
> {{ at 
> org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:100)}}
> {{ at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)}}
> {{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
> {{ at 
> org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.writeIteratorToStream(ArrowPythonRunner.scala:122)}}
> {{ at 
> org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:465)}}
> {{ at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2124)}}
> {{ at 
> org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:257)}}
> {{21/01/06 04:05:06 ERROR Executor: Exception in task 84.3 in stage 4349.0 
> (TID 83266)}}
> {{java.lang.IndexOutOfBoundsException: index: 0, length: 1073741824 
> (expected: range(0, 0))}}
> {{ at io.netty.buffer.ArrowBuf.checkIndex(ArrowBuf.java:716)}}
> {{ at io.netty.buffer.ArrowBuf.setBytes(ArrowBuf.java:954)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.reallocDataBuffer(BaseVariableWidthVector.java:508)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.handleSafe(BaseVariableWidthVector.java:1239)}}
> {{ at 
> org.apache.arrow.vector.BaseVariableWidthVector.setSafe(BaseVariableWidthVector.java:1066)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.StringWriter.setValue(ArrowWriter.scala:278)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.ArrowFieldWriter.write(ArrowWriter.scala:139)}}
> {{ at 
> org.apache.spark.sql.execution.arrow.ArrowWriter.write(ArrowWriter.scala:93)}}
> {{ at 
> org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.$anonfun$writeIteratorToStream$1(ArrowPythonRunner.scala:100)}}
> {{ at scala.runtime.java8.JFunction0$mcV$sp.apply(JFunction0$mcV$sp.java:23)}}
> {{ at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1559)}}
> {{ at 
> org.apache.spark.sql.execution.python.ArrowPythonRunner$$anon$1.writeIteratorToStream(ArrowPythonRunner.scala:122)}}
> {{ at 
> org.apache.spark.api.python.BasePythonRunner$WriterThread.$anonfun$run$1(PythonRunner.scala:465)}}
> {{ at org.apache.spark.util.Utils$.logUncaughtExceptions(Utils.scala:2124)}}
> {{ at 
> org.apache.spark.api.python.BasePythonRunner$WriterThread.run(PythonRunner.scala:257)}}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to