That looks to me like you have two different versions of Spark in use somewhere here. Like the cluster and driver versions aren't quite the same. Check your classpaths?
On Fri, Feb 26, 2021 at 2:53 AM Bode, Meikel, NMA-CFD < meikel.b...@bertelsmann.de> wrote: > Hi All, > > > > After changing to 3.0.2 I face the following issue. Thanks for any hint on > that issue. > > > > Best, > > Meikel > > > > df = self.spark.read.json(path_in) > > File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/readwriter.py", line > 300, in json > > File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/java_gateway.py", > line 1304, in __call__ > > File "/opt/spark/python/lib/pyspark.zip/pyspark/sql/utils.py", line 128, > in deco > > File "/opt/spark/python/lib/py4j-0.10.9-src.zip/py4j/protocol.py", line > 326, in get_return_value > > py4j.protocol.Py4JJavaError: An error occurred while calling o76.json. > > : org.apache.spark.SparkException: Job aborted due to stage failure: Task > 0 in stage 0.0 failed 4 times, most recent failure: Lost task 0.3 in stage > 0.0 (TID 14, 192.168.1.6, executor 0): java.io.InvalidClassException: > org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: > stream classdesc serialVersionUID = 4804550167553929379, local class > serialVersionUID = 3291767831129286585 > > at > java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699) > > at > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003) > > at > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850) > > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160) > > at > java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) > > at > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) > > at > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) > > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) > > at > java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) > > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) > > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) > > at > org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76) > > at > org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115) > > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407) > > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > > at java.lang.Thread.run(Thread.java:748) > > > > Driver stacktrace: > > at > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2059) > > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2008) > > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2007) > > at > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) > > at > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) > > at > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) > > at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2007) > > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:973) > > at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:973) > > at scala.Option.foreach(Option.scala:407) > > at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:973) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:2239) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2188) > > at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2177) > > at org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) > > at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:775) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2114) > > at org.apache.spark.SparkContext.runJob(SparkContext.scala:2209) > > at > org.apache.spark.sql.catalyst.json.JsonInferSchema.infer(JsonInferSchema.scala:94) > > at > org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.$anonfun$inferFromDataset$5(JsonDataSource.scala:110) > > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:160) > > at > org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.inferFromDataset(JsonDataSource.scala:110) > > at > org.apache.spark.sql.execution.datasources.json.TextInputJsonDataSource$.infer(JsonDataSource.scala:99) > > at > org.apache.spark.sql.execution.datasources.json.JsonDataSource.inferSchema(JsonDataSource.scala:65) > > at > org.apache.spark.sql.execution.datasources.json.JsonFileFormat.inferSchema(JsonFileFormat.scala:61) > > at > org.apache.spark.sql.execution.datasources.DataSource.$anonfun$getOrInferFileFormatSchema$11(DataSource.scala:208) > > at scala.Option.orElse(Option.scala:447) > > at > org.apache.spark.sql.execution.datasources.DataSource.getOrInferFileFormatSchema(DataSource.scala:205) > > at > org.apache.spark.sql.execution.datasources.DataSource.resolveRelation(DataSource.scala:418) > > at > org.apache.spark.sql.DataFrameReader.loadV1Source(DataFrameReader.scala:297) > > at > org.apache.spark.sql.DataFrameReader.$anonfun$load$2(DataFrameReader.scala:286) > > at scala.Option.getOrElse(Option.scala:189) > > at > org.apache.spark.sql.DataFrameReader.load(DataFrameReader.scala:286) > > at > org.apache.spark.sql.DataFrameReader.json(DataFrameReader.scala:477) > > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > > at java.lang.reflect.Method.invoke(Method.java:498) > > at py4j.reflection.MethodInvoker.invoke(MethodInvoker.java:244) > > at > py4j.reflection.ReflectionEngine.invoke(ReflectionEngine.java:357) > > at py4j.Gateway.invoke(Gateway.java:282) > > at > py4j.commands.AbstractCommand.invokeMethod(AbstractCommand.java:132) > > at py4j.commands.CallCommand.execute(CallCommand.java:79) > > at py4j.GatewayConnection.run(GatewayConnection.java:238) > > at java.lang.Thread.run(Thread.java:748) > > Caused by: java.io.InvalidClassException: > org.apache.spark.broadcast.TorrentBroadcast; local class incompatible: > stream classdesc serialVersionUID = 4804550167553929379, local class > serialVersionUID = 3291767831129286585 > > at > java.io.ObjectStreamClass.initNonProxy(ObjectStreamClass.java:699) > > at > java.io.ObjectInputStream.readNonProxyDesc(ObjectInputStream.java:2003) > > at > java.io.ObjectInputStream.readClassDesc(ObjectInputStream.java:1850) > > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2160) > > at > java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) > > at > java.io.ObjectInputStream.defaultReadFields(ObjectInputStream.java:2405) > > at > java.io.ObjectInputStream.readSerialData(ObjectInputStream.java:2329) > > at > java.io.ObjectInputStream.readOrdinaryObject(ObjectInputStream.java:2187) > > at > java.io.ObjectInputStream.readObject0(ObjectInputStream.java:1667) > > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:503) > > at java.io.ObjectInputStream.readObject(ObjectInputStream.java:461) > > at > org.apache.spark.serializer.JavaDeserializationStream.readObject(JavaSerializer.scala:76) > > at > org.apache.spark.serializer.JavaSerializerInstance.deserialize(JavaSerializer.scala:115) > > at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:407) > > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > > ... 1 more >