Hi, could you please clarify if you are running a YARN cluster when you see this problem? I tried on Spark standalone and could not reproduce. If it's on a YARN cluster, please file a JIRA and I can try to investigate further.
Thanks, Bryan On Sat, Dec 15, 2018 at 3:42 AM 李斌松 <libinsong1...@gmail.com> wrote: > spark2.4 arrow enabled true,error log not returned,in spark 2.3,There's > no such problem. > > 1、spark.sql.execution.arrow.enabled=true > [image: image.png] > *yarn log:* > > 18/12/15 14:35:52 INFO CodeGenerator: Code generated in 1030.698785 ms >> >> 18/12/15 14:35:54 INFO PythonRunner: Times: total = 1985, boot = 1892, >>> init = 92, finish = 1 >> >> 18/12/15 14:35:54 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). >>> 1799 bytes result sent to driver >> >> 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 1 >> >> 18/12/15 14:35:55 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) >> >> 18/12/15 14:35:55 INFO TorrentBroadcast: Started reading broadcast >>> variable 1 >> >> 18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1_piece0 stored as >>> bytes in memory (estimated size 8.3 KB, free 1048.8 MB) >> >> 18/12/15 14:35:55 INFO TorrentBroadcast: Reading broadcast variable 1 >>> took 18 ms >> >> 18/12/15 14:35:55 INFO MemoryStore: Block broadcast_1 stored as values in >>> memory (estimated size 14.0 KB, free 1048.8 MB) >> >> 18/12/15 14:35:55 INFO CodeGenerator: Code generated in 30.269745 ms >> >> 18/12/15 14:35:55 INFO PythonRunner: Times: total = 13, boot = 5, init = >>> 7, finish = 1 >> >> 18/12/15 14:35:55 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). >>> 1893 bytes result sent to driver >> >> 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 2 >> >> 18/12/15 14:35:55 INFO Executor: Running task 1.0 in stage 1.0 (TID 2) >> >> 18/12/15 14:35:55 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID >>> 2) >> >> org.apache.spark.api.python.PythonException: Traceback (most recent call >>> last): >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 377, in main >> >> process() >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 372, in process >> >> serializer.dump_stream(func(split_index, iterator), outfile) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >>> line 390, in dump_stream >> >> vs = list(itertools.islice(iterator, batch)) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >>> in wrapper >> >> return f(*args, **kwargs) >> >> File >>> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", >>> line 435, in mapfunc >> >> ValueError: could not convert string to float: 'a' >> >> >>> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> >> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> >> at >>> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) >> >> at scala.collection.Iterator$class.foreach(Iterator.scala:893) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) >> >> at >>> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) >> >> at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) >> >> at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to >>> (ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> >> at >>> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> >> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> >> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> >> at java.lang.Thread.run(Thread.java:748) >> >> 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 3 >> >> 18/12/15 14:35:55 INFO Executor: Running task 1.1 in stage 1.0 (TID 3) >> >> 18/12/15 14:35:55 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID >>> 3) >> >> org.apache.spark.api.python.PythonException: Traceback (most recent call >>> last): >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 377, in main >> >> process() >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 372, in process >> >> serializer.dump_stream(func(split_index, iterator), outfile) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >>> line 390, in dump_stream >> >> vs = list(itertools.islice(iterator, batch)) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >>> in wrapper >> >> return f(*args, **kwargs) >> >> File >>> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", >>> line 435, in mapfunc >> >> ValueError: could not convert string to float: 'a' >> >> >>> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> >> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> >> at >>> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) >> >> at scala.collection.Iterator$class.foreach(Iterator.scala:893) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) >> >> at >>> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) >> >> at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) >> >> at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to >>> (ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> >> at >>> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> >> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> >> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> >> at java.lang.Thread.run(Thread.java:748) >> >> 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 4 >> >> 18/12/15 14:35:55 INFO Executor: Running task 1.2 in stage 1.0 (TID 4) >> >> 18/12/15 14:35:55 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID >>> 4) >> >> org.apache.spark.api.python.PythonException: Traceback (most recent call >>> last): >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 377, in main >> >> process() >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 372, in process >> >> serializer.dump_stream(func(split_index, iterator), outfile) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >>> line 390, in dump_stream >> >> vs = list(itertools.islice(iterator, batch)) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >>> in wrapper >> >> return f(*args, **kwargs) >> >> File >>> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", >>> line 435, in mapfunc >> >> ValueError: could not convert string to float: 'a' >> >> >>> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> >> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> >> at >>> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) >> >> at scala.collection.Iterator$class.foreach(Iterator.scala:893) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) >> >> at >>> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) >> >> at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) >> >> at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to >>> (ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> >> at >>> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> >> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> >> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> >> at java.lang.Thread.run(Thread.java:748) >> >> 18/12/15 14:35:55 INFO CoarseGrainedExecutorBackend: Got assigned task 5 >> >> 18/12/15 14:35:55 INFO Executor: Running task 1.3 in stage 1.0 (TID 5) >> >> 18/12/15 14:35:56 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID >>> 5) >> >> org.apache.spark.api.python.PythonException: Traceback (most recent call >>> last): >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 377, in main >> >> process() >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >>> 372, in process >> >> serializer.dump_stream(func(split_index, iterator), outfile) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >>> line 390, in dump_stream >> >> vs = list(itertools.islice(iterator, batch)) >> >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >>> in wrapper >> >> return f(*args, **kwargs) >> >> File >>> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0215/container_e43_1544579748138_0215_01_000001/python1.py", >>> line 435, in mapfunc >> >> ValueError: could not convert string to float: 'a' >> >> >>> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> >> at >>> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> >> at >>> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> >> at >>> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.hasNext(ArrowConverters.scala:99) >> >> at scala.collection.Iterator$class.foreach(Iterator.scala:893) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.foreach(ArrowConverters.scala:97) >> >> at >>> scala.collection.generic.Growable$class.$plus$plus$eq(Growable.scala:59) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:104) >> >> at >>> scala.collection.mutable.ArrayBuffer.$plus$plus$eq(ArrayBuffer.scala:48) >> >> at scala.collection.TraversableOnce$class.to(TraversableOnce.scala:310) >> >> at org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.to >>> (ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toBuffer(TraversableOnce.scala:302) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toBuffer(ArrowConverters.scala:97) >> >> at >>> scala.collection.TraversableOnce$class.toArray(TraversableOnce.scala:289) >> >> at >>> org.apache.spark.sql.execution.arrow.ArrowConverters$$anon$1.toArray(ArrowConverters.scala:97) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at >>> org.apache.spark.sql.Dataset$$anonfun$collectAsArrowToPython$1$$anonfun$apply$17$$anonfun$apply$18.apply(Dataset.scala:3314) >> >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> >> at >>> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> >> at >>> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> >> at >>> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> >> at java.lang.Thread.run(Thread.java:748) >> >> 18/12/15 14:37:25 ERROR CoarseGrainedExecutorBackend: Executor >>> self-exiting due to : Driver dp199:37391 disassociated! Shutting down. >> >> 18/12/15 14:37:25 INFO DiskBlockManager: Shutdown hook called >> >> 18/12/15 14:37:25 INFO ShutdownHookManager: Shutdown hook called >> >> > 2、spark.sql.execution.arrow.enabled=false > > [image: image.png] > *yarn log:* > > 18/12/15 14:24:51 INFO CodeGenerator: Code generated in 1850.239094 ms >> 18/12/15 14:24:51 INFO PythonRunner: Times: total = 2391, boot = 2312, >> init = 79, finish = 0 >> 18/12/15 14:24:51 INFO PythonRunner: Times: total = 105, boot = 4, init = >> 100, finish = 1 >> 18/12/15 14:24:51 INFO Executor: Finished task 0.0 in stage 0.0 (TID 0). >> 1756 bytes result sent to driver >> 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 1 >> 18/12/15 14:24:52 INFO Executor: Running task 0.0 in stage 1.0 (TID 1) >> 18/12/15 14:24:52 INFO TorrentBroadcast: Started reading broadcast >> variable 1 >> 18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1_piece0 stored as >> bytes in memory (estimated size 8.7 KB, free 1048.8 MB) >> 18/12/15 14:24:52 INFO TorrentBroadcast: Reading broadcast variable 1 >> took 11 ms >> 18/12/15 14:24:52 INFO MemoryStore: Block broadcast_1 stored as values in >> memory (estimated size 15.4 KB, free 1048.8 MB) >> 18/12/15 14:24:52 INFO PythonRunner: Times: total = 3, boot = -3617, init >> = 3620, finish = 0 >> 18/12/15 14:24:52 INFO CodeGenerator: Code generated in 50.625969 ms >> 18/12/15 14:24:52 INFO PythonRunner: Times: total = 16, boot = 5, init = >> 10, finish = 1 >> 18/12/15 14:24:52 INFO Executor: Finished task 0.0 in stage 1.0 (TID 1). >> 1973 bytes result sent to driver >> 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 2 >> 18/12/15 14:24:52 INFO Executor: Running task 1.0 in stage 1.0 (TID 2) >> 18/12/15 14:24:52 INFO PythonRunner: Times: total = 48, boot = -72, init >> = 120, finish = 0 >> 18/12/15 14:24:52 ERROR Executor: Exception in task 1.0 in stage 1.0 (TID >> 2) >> org.apache.spark.api.python.PythonException: Traceback (most recent call >> last): >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 377, in main >> process() >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 372, in process >> serializer.dump_stream(func(split_index, iterator), outfile) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >> line 390, in dump_stream >> vs = list(itertools.islice(iterator, batch)) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >> in wrapper >> return f(*args, **kwargs) >> File >> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", >> line 435, in mapfunc >> ValueError: could not convert string to float: 'a' >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> at >> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> at >> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> at java.lang.Thread.run(Thread.java:748) >> 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 3 >> 18/12/15 14:24:52 INFO Executor: Running task 1.1 in stage 1.0 (TID 3) >> 18/12/15 14:24:52 INFO PythonRunner: Times: total = 46, boot = -57, init >> = 102, finish = 1 >> 18/12/15 14:24:52 ERROR Executor: Exception in task 1.1 in stage 1.0 (TID >> 3) >> org.apache.spark.api.python.PythonException: Traceback (most recent call >> last): >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 377, in main >> process() >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 372, in process >> serializer.dump_stream(func(split_index, iterator), outfile) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >> line 390, in dump_stream >> vs = list(itertools.islice(iterator, batch)) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >> in wrapper >> return f(*args, **kwargs) >> File >> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", >> line 435, in mapfunc >> ValueError: could not convert string to float: 'a' >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> at >> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> at >> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> at java.lang.Thread.run(Thread.java:748) >> 18/12/15 14:24:52 INFO CoarseGrainedExecutorBackend: Got assigned task 4 >> 18/12/15 14:24:52 INFO Executor: Running task 1.2 in stage 1.0 (TID 4) >> 18/12/15 14:24:53 INFO PythonRunner: Times: total = 709, boot = -12, init >> = 721, finish = 0 >> 18/12/15 14:24:53 ERROR Executor: Exception in task 1.2 in stage 1.0 (TID >> 4) >> org.apache.spark.api.python.PythonException: Traceback (most recent call >> last): >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 377, in main >> process() >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 372, in process >> serializer.dump_stream(func(split_index, iterator), outfile) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >> line 390, in dump_stream >> vs = list(itertools.islice(iterator, batch)) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >> in wrapper >> return f(*args, **kwargs) >> File >> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", >> line 435, in mapfunc >> ValueError: could not convert string to float: 'a' >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> at >> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> at >> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> at java.lang.Thread.run(Thread.java:748) >> 18/12/15 14:24:53 INFO CoarseGrainedExecutorBackend: Got assigned task 5 >> 18/12/15 14:24:53 INFO Executor: Running task 1.3 in stage 1.0 (TID 5) >> 18/12/15 14:24:53 INFO PythonRunner: Times: total = 2, boot = -24, init = >> 26, finish = 0 >> 18/12/15 14:24:53 ERROR Executor: Exception in task 1.3 in stage 1.0 (TID >> 5) >> org.apache.spark.api.python.PythonException: Traceback (most recent call >> last): >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 377, in main >> process() >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/worker.py", line >> 372, in process >> serializer.dump_stream(func(split_index, iterator), outfile) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/serializers.py", >> line 390, in dump_stream >> vs = list(itertools.islice(iterator, batch)) >> File "/usr/install/pyspark/2.4.0/pyspark.zip/pyspark/util.py", line 99, >> in wrapper >> return f(*args, **kwargs) >> File >> "/yarn/nm/usercache/admin/appcache/application_1544579748138_0213/container_e43_1544579748138_0213_01_000001/python1.py", >> line 435, in mapfunc >> ValueError: could not convert string to float: 'a' >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:588) >> at >> org.apache.spark.api.python.PythonRunner$$anon$1.read(PythonRunner.scala:571) >> at >> org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) >> at >> org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) >> at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:439) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:408) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) >> at >> org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) >> at >> org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) >> at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) >> at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) >> at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) >> at org.apache.spark.scheduler.Task.run(Task.scala:121) >> at >> org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:402) >> at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) >> at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:408) >> at >> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) >> at >> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) >> at java.lang.Thread.run(Thread.java:748) >> 18/12/15 14:26:39 ERROR CoarseGrainedExecutorBackend: Executor >> self-exiting due to : Driver dp253:7665 disassociated! Shutting down. >> 18/12/15 14:26:39 INFO DiskBlockManager: Shutdown hook called >> 18/12/15 14:26:39 INFO ShutdownHookManager: Shutdown hook called > > >