Marius Stanescu created SPARK-28001: ---------------------------------------
Summary: Dataframe throws 'socket.timeout: timed out' exception Key: SPARK-28001 URL: https://issues.apache.org/jira/browse/SPARK-28001 Project: Spark Issue Type: Bug Components: PySpark Affects Versions: 2.4.3 Environment: Processor: Intel Core i7-7700 CPU @ 3.60Ghz RAM: 16 GB OS: Windows 10 Enterprise 64-bit Python: 3.7.2 PySpark: 3.4.3 Cluster manager: Spark Standalone Reporter: Marius Stanescu I load data from Azure Table Storage, create a DataFrame and perform a couple of operations via two user-defined functions, then call show() to display the results. If I load a very small batch of items, like 5, everything is working fine, but if I load a batch grater then 10 items from Azure Table Storage then I get the 'socket.timeout: timed out' exception. Here is the code: {code} import time import json import requests from requests.auth import HTTPBasicAuth from azure.cosmosdb.table.tableservice import TableService from azure.cosmosdb.table.models import Entity from pyspark.sql import SparkSession from pyspark.sql.functions import udf, struct from pyspark.sql.types import BooleanType def main(): batch_size = 25 azure_table_account_name = '***' azure_table_account_key = '***' azure_table_name = '***' spark = SparkSession \ .builder \ .appName(agent_name) \ .config("spark.sql.crossJoin.enabled", "true") \ .getOrCreate() table_service = TableService(account_name=azure_table_account_name, account_key=azure_table_account_key) continuation_token = None while True: messages = table_service.query_entities( azure_table_name, select="RowKey, PartitionKey, messageId, ownerSmtp, Timestamp", num_results=batch_size, marker=continuation_token, timeout=60) continuation_token = messages.next_marker messages_list = list(messages) if not len(messages_list): time.sleep(5) pass messages_df = spark.createDataFrame(messages_list) register_records_df = messages_df \ .withColumn('Registered', register_record('RowKey', 'PartitionKey', 'messageId', 'ownerSmtp', 'Timestamp')) only_registered_records_df = register_records_df \ .filter(register_records_df.Registered == True) \ .drop(register_records_df.Registered) update_message_status_df = only_registered_records_df \ .withColumn('TableEntryDeleted', delete_table_entity('RowKey', 'PartitionKey')) results_df = update_message_status_df.select( update_message_status_df.RowKey, update_message_status_df.PartitionKey, update_message_status_df.TableEntryDeleted) #results_df.explain() results_df.show(n=batch_size, truncate=False) @udf(returnType=BooleanType()) def register_record(rowKey, partitionKey, messageId, ownerSmtp, timestamp): # call an API try: url = '{}/data/record/{}'.format('***', rowKey) headers = { 'Content-type': 'application/json' } response = requests.post( url, headers=headers, auth=HTTPBasicAuth('***', '***'), data=prepare_record_data(rowKey, partitionKey, messageId, ownerSmtp, timestamp)) return bool(response) except: return False def prepare_record_data(rowKey, partitionKey, messageId, ownerSmtp, timestamp): record_data = { "Title": messageId, "Type": '***', "Source": '***', "Creator": ownerSmtp, "Publisher": '***', "Date": timestamp.strftime('%Y-%m-%dT%H:%M:%SZ') } return json.dumps(record_data) @udf(returnType=BooleanType()) def delete_table_entity(row_key, partition_key): azure_table_account_name = '***' azure_table_account_key = '***' azure_table_name = '***' try: table_service = TableService(account_name=azure_table_account_name, account_key=azure_table_account_key) table_service.delete_entity(azure_table_name, partition_key, row_key) return True except: return False if __name__ == "__main__": main() {code} Here is the console output: {noformat} == Physical Plan == *(2) Project [RowKey#54, PartitionKey#53, pythonUDF0#93 AS TableEntryDeleted#81] +- BatchEvalPython [delete_table_entity(RowKey#54, PartitionKey#53)], [PartitionKey#53, RowKey#54, pythonUDF0#93] +- *(1) Project [PartitionKey#53, RowKey#54] +- *(1) Project [PartitionKey#53, RowKey#54, Timestamp#55, etag#56, messageId#57, ownerSmtp#58] +- *(1) Filter (pythonUDF0#92 = true) +- BatchEvalPython [register_record(RowKey#54, PartitionKey#53, messageId#57, ownerSmtp#58, Timestamp#55)], [PartitionKey#53, RowKey#54, Timestamp#55, etag#56, messageId#57, ownerSmtp#58, pythonUDF0#92] +- Scan ExistingRDD[PartitionKey#53,RowKey#54,Timestamp#55,etag#56,messageId#57,ownerSmtp#58] [Stage 5:=======================================> (2 + 1) / 3]19/06/11 16:32:49 ERROR Executor: Exception in task 2.0 in stage 5.0 (TID 15) org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "D:\Spark\python\lib\pyspark.zip\pyspark\worker.py", line 362, in main File "D:\Spark\python\lib\pyspark.zip\pyspark\serializers.py", line 715, in read_int length = stream.read(4) File "C:\Python37\lib\socket.py", line 589, in readinto return self._sock.recv_into(b) socket.timeout: timed out at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:81) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:64) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 19/06/11 16:32:49 WARN TaskSetManager: Lost task 2.0 in stage 5.0 (TID 15, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "D:\Spark\python\lib\pyspark.zip\pyspark\worker.py", line 362, in main File "D:\Spark\python\lib\pyspark.zip\pyspark\serializers.py", line 715, in read_int length = stream.read(4) File "C:\Python37\lib\socket.py", line 589, in readinto return self._sock.recv_into(b) socket.timeout: timed out at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:81) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:64) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) 19/06/11 16:32:49 ERROR TaskSetManager: Task 2 in stage 5.0 failed 1 times; aborting job Traceback (most recent call last): File "c:\Users\Marius Stanescu\.vscode\extensions\ms-python.python-2019.5.18875\pythonFiles\ptvsd_launcher.py", line 43, in <module> main(ptvsdArgs) File "c:\Users\Marius Stanescu\.vscode\extensions\ms-python.python-2019.5.18875\pythonFiles\lib\python\ptvsd\__main__.py", line 434, in main run() File "c:\Users\Marius Stanescu\.vscode\extensions\ms-python.python-2019.5.18875\pythonFiles\lib\python\ptvsd\__main__.py", line 312, in run_file runpy.run_path(target, run_name='__main__') File "C:\Python37\lib\runpy.py", line 263, in run_path pkg_name=pkg_name, script_name=fname) File "C:\Python37\lib\runpy.py", line 96, in _run_module_code mod_name, mod_spec, pkg_name, script_name) File "C:\Python37\lib\runpy.py", line 85, in _run_code exec(code, run_globals) File "c:\Projects\Goldberg\Goldberg\spark\application\load\smtp-to-narro\v1\smtp-to-narro.py", line 123, in <module> main() File "c:\Projects\Goldberg\Goldberg\spark\application\load\smtp-to-narro\v1\smtp-to-narro.py", line 63, in main results_df.show(n=batch_size, truncate=True) File "C:\Python37\lib\site-packages\pyspark\sql\dataframe.py", line 378, in show print(self._jdf.showString(n, 20, vertical)) File "C:\Python37\lib\site-packages\py4j\java_gateway.py", line 1257, in __call__ answer, self.gateway_client, self.target_id, self.name) File "C:\Python37\lib\site-packages\pyspark\sql\utils.py", line 63, in deco return f(*a, **kw) File "C:\Python37\lib\site-packages\py4j\protocol.py", line 328, in get_return_value format(target_id, ".", name), value) py4j.protocol.Py4JJavaError: An error occurred while calling o154.showString. : org.apache.spark.SparkException: Job aborted due to stage failure: Task 2 in stage 5.0 failed 1 times, most recent failure: Lost task 2.0 in stage 5.0 (TID 15, localhost, executor driver): org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "D:\Spark\python\lib\pyspark.zip\pyspark\worker.py", line 362, in main File "D:\Spark\python\lib\pyspark.zip\pyspark\serializers.py", line 715, in read_int length = stream.read(4) File "C:\Python37\lib\socket.py", line 589, in readinto return self._sock.recv_into(b) socket.timeout: timed out at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:81) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:64) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) Driver stacktrace: at org.apache.spark.scheduler.DAGScheduler.org$apache$spark$scheduler$DAGScheduler$$failJobAndIndependentStages(DAGScheduler.scala:1889) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1877) at org.apache.spark.scheduler.DAGScheduler$$anonfun$abortStage$1.apply(DAGScheduler.scala:1876) at scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) at org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:1876) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) at org.apache.spark.scheduler.DAGScheduler$$anonfun$handleTaskSetFailed$1.apply(DAGScheduler.scala:926) at scala.Option.foreach(Option.scala:257) at org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:926) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2110) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2059) at org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2048) at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:737) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2061) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2082) at org.apache.spark.SparkContext.runJob(SparkContext.scala:2101) at org.apache.spark.sql.execution.SparkPlan.executeTake(SparkPlan.scala:365) at org.apache.spark.sql.execution.CollectLimitExec.executeCollect(limit.scala:38) at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$collectFromPlan(Dataset.scala:3383) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2544) at org.apache.spark.sql.Dataset$$anonfun$head$1.apply(Dataset.scala:2544) at org.apache.spark.sql.Dataset$$anonfun$53.apply(Dataset.scala:3364) at org.apache.spark.sql.execution.SQLExecution$$anonfun$withNewExecutionId$1.apply(SQLExecution.scala:78) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:125) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:73) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3363) at org.apache.spark.sql.Dataset.head(Dataset.scala:2544) at org.apache.spark.sql.Dataset.take(Dataset.scala:2758) at org.apache.spark.sql.Dataset.getRows(Dataset.scala:254) at org.apache.spark.sql.Dataset.showString(Dataset.scala:291) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) at py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) at py4j.Gateway.invoke(Gateway.java:282) at py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) at py4j.commands.CallCommand.execute(CallCommand.java:79) at py4j.GatewayConnection.run(GatewayConnection.java:238) at java.lang.Thread.run(Thread.java:748) Caused by: org.apache.spark.api.python.PythonException: Traceback (most recent call last): File "D:\Spark\python\lib\pyspark.zip\pyspark\worker.py", line 362, in main File "D:\Spark\python\lib\pyspark.zip\pyspark\serializers.py", line 715, in read_int length = stream.read(4) File "C:\Python37\lib\socket.py", line 589, in readinto return self._sock.recv_into(b) socket.timeout: timed out at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.handlePythonException(PythonRunner.scala:452) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:81) at org.apache.spark.sql.execution.python.PythonUDFRunner$$anon$1.read(PythonUDFRunner.scala:64) at org.apache.spark.api.python.BasePythonRunner$ReaderIterator.hasNext(PythonRunner.scala:406) at org.apache.spark.InterruptibleIterator.hasNext(InterruptibleIterator.scala:37) at scala.collection.Iterator$$anon$12.hasNext(Iterator.scala:440) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at scala.collection.Iterator$$anon$11.hasNext(Iterator.scala:409) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenExec$$anonfun$13$$anon$1.hasNext(WholeStageCodegenExec.scala:636) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:255) at org.apache.spark.sql.execution.SparkPlan$$anonfun$2.apply(SparkPlan.scala:247) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.RDD$$anonfun$mapPartitionsInternal$1$$anonfun$apply$24.apply(RDD.scala:836) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:324) at org.apache.spark.rdd.RDD.iterator(RDD.scala:288) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) at org.apache.spark.scheduler.Task.run(Task.scala:121) at org.apache.spark.executor.Executor$TaskRunner$$anonfun$10.apply(Executor.scala:408) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1360) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:414) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) ... 1 more SUCCESS: The process with PID 7324 (child process of PID 9648) has been terminated. SUCCESS: The process with PID 9648 (child process of PID 11864) has been terminated. SUCCESS: The process with PID 11864 (child process of PID 14332) has been terminated. SUCCESS: The process with PID 14332 (child process of PID 1060) has been terminated. SUCCESS: The process with PID 1060 (child process of PID 3524) has been terminated. {noformat} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org