It looks like the executor (JVM) stops immediately. Hard to say why - do
you have Java installed and a compatible version? I agree it could be a
py4j version problem, from that SO link.

On Sat, May 8, 2021, 1:35 PM rajat kumar <kumar.rajat20...@gmail.com> wrote:

> Hi Sean/Mich,
>
> Thanks for response.
>
> That was the full log. Sending again for reference. I am just running
> foreach (lamda) which runs pure python code.
>
> Exception in read_logs :  Py4JJavaError Traceback (most recent call last):
>   File "/opt/spark/python/lib/python3.6/site-packages/filename.py", line
> 42, in read_logs
>     data_df.rdd.foreach(lambda x: process_logs(x))
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 789, in
> foreach
>     self.mapPartitions(processPartition).count()  # Force evaluation
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1055, in
> count
>     return self.mapPartitions(lambda i: [sum(1 for _ in i)]).sum()
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 1046, in
> sum
>     return self.mapPartitions(lambda x: [sum(x)]).fold(0, operator.add)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 917, in
> fold
>     vals = self.mapPartitions(func).collect()
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/rdd.py", line 816, in
> collect
>     sock_info = self.ctx._jvm.PythonRDD.collectAndServe(self._jrdd.rdd())
>   File "/opt/spark/python/lib/py4j-0.10.7-src.zip/py4j/java_gateway.py",
> line 1257, in __call__
>     answer, self.gateway_client, self.target_id, self.name)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 63,
> in deco
>     return f(*a, **kw)
>   File "/opt/spark/python/lib/py4j-0.10.7-src.zip/py4j/protocol.py", line
> 328, in get_return_value
>     format(target_id, ".", name), value)
> py4j.protocol.Py4JJavaError: An error occurred while calling
> z:org.apache.spark.api.python.PythonRDD.collectAndServe.
> : org.apache.spark.SparkException: Job aborted due to stage failure: Task
> 1 in stage 3.0 failed 4 times, most recent failure: Lost task 1.3 in stage
> 3.0 (TID 15, 10.244.42.133, executor 1):
> org.apache.spark.api.python.PythonException: Traceback (most recent call
> last):
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 364, in
> main
>     func, profiler, deserializer, serializer = read_command(pickleSer,
> infile)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/worker.py", line 69, in
> read_command
>     command = serializer._read_with_length(file)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/serializers.py", line
> 172, in _read_with_length
>     return self.loads(obj)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/serializers.py", line
> 580, in loads
>     return pickle.loads(obj, encoding=encoding)
>   File "/opt/spark/python/lib/python3.6/site-packages/filename.py", line
> 10, in <module>
>     spark = SparkSession.builder.appName("test").getOrCreate()
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/session.py", line
> 173, in getOrCreate
>     sc = SparkContext.getOrCreate(sparkConf)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/context.py", line 367,
> in getOrCreate
>     SparkContext(conf=conf or SparkConf())
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/context.py", line 133,
> in __init__
>     SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/context.py", line 316,
> in _ensure_initialized
>     SparkContext._gateway = gateway or launch_gateway(conf)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/java_gateway.py", line
> 46, in launch_gateway
>     return _launch_gateway(conf)
>   File "/opt/spark/python/lib/pyspark.zip/pyspark/java_gateway.py", line
> 108, in _launch_gateway
>     raise Exception("Java gateway process exited before sending its port
> number")
> Exception: Java gateway process exited before sending its port number
>

Reply via email to