[ https://issues.apache.org/jira/browse/SPARK-33820?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Terry Moschou updated SPARK-33820: ---------------------------------- Issue Type: Bug (was: Improvement) > java.lang.ClassCastException: com.example.A cannot be cast to com.example.A > when spark.executor.userClassPathFirst is true > -------------------------------------------------------------------------------------------------------------------------- > > Key: SPARK-33820 > URL: https://issues.apache.org/jira/browse/SPARK-33820 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.4.7, 3.0.1 > Environment: {noformat} > $ java -version > openjdk version "1.8.0_212" > OpenJDK Runtime Environment (AdoptOpenJDK)(build 1.8.0_212-b04) > OpenJDK 64-Bit Server VM (AdoptOpenJDK)(build 25.212-b04, mixed mode) > $ mvn -version > Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f) > Maven home: /usr/local/Cellar/maven/3.6.3_1/libexec > Java version: 1.8.0_212, vendor: AdoptOpenJDK, runtime: > /Library/Java/JavaVirtualMachines/adoptopenjdk-8.jdk/Contents/Home/jre > Default locale: en_AU, platform encoding: UTF-8 > OS name: "mac os x", version: "10.15.7", arch: "x86_64", family: "Mac" > {noformat} > Reporter: Terry Moschou > Priority: Major > > Specifying > {{--conf spark.executor.userClassPathFirst=true}} > as an option to {{spark-submit}} causes a {{java.lang.ClassCastException}} to > be thrown when trying to cast a class to itself, that would otherwise not > have been thrown if the option was not specified. > Minimal test case to reproduce the error is below. > {{com/example/App.scala}} > {code:scala} > package com.example > import org.apache.spark.sql.{Dataset, SaveMode, SparkSession} > case class A(id: String, value: Long) > object App { > def main(args: Array[String]): Unit = { > val spark = SparkSession.builder().getOrCreate() > import spark.implicits._ > val ds: Dataset[A] = spark > .range(5) > .map { v => A(s"$v", v) } > ds.write.format("json").mode(SaveMode.Overwrite).save("out.json") > spark.close() > } > } > {code} > {{build.xml}} > {code:xml} > <?xml version="1.0" encoding="UTF-8"?> > <project xmlns="http://maven.apache.org/POM/4.0.0" > xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" > xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 > http://maven.apache.org/xsd/maven-4.0.0.xsd"> > <modelVersion>4.0.0</modelVersion> > <groupId>com.example</groupId> > <artifactId>test-bug</artifactId> > <version>1.0-SNAPSHOT</version> > <properties> > <version.java>1.8</version.java> > <maven.compiler.source>${version.java}</maven.compiler.source> > <maven.compiler.target>${version.java}</maven.compiler.target> > <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> > <scala.binary.version>2.12</scala.binary.version> > <scala.version>2.12.12</scala.version> > </properties> > <dependencies> > <dependency> > <groupId>org.apache.spark</groupId> > <artifactId>spark-sql_${scala.binary.version}</artifactId> > <version>3.0.1</version> > <scope>provided</scope> > </dependency> > </dependencies> > <build> > <pluginManagement> > <plugins> > <plugin> > <groupId>net.alchim31.maven</groupId> > <artifactId>scala-maven-plugin</artifactId> > <version>4.3.1</version> > <executions> > <execution> > <goals> > <goal>add-source</goal> > <goal>compile</goal> > <goal>testCompile</goal> > </goals> > </execution> > </executions> > <configuration> > <scalaVersion>${scala.version}</scalaVersion> > <recompileMode>incremental</recompileMode> > <args> > <arg>-unchecked</arg> > <arg>-deprecation</arg> > <arg>-feature</arg> > <arg>-explaintypes</arg> > <arg>-Yno-adapted-args</arg> > <arg>-Ypartial-unification</arg> > <arg>-language:higherKinds</arg> > </args> > <javacArgs> > <javacArg>-source</javacArg> > <javacArg>${java.version}</javacArg> > <javacArg>-target</javacArg> > <javacArg>${java.version}</javacArg> > <javacArg>-Xlint:all,-serial,-path,-try</javacArg> > </javacArgs> > </configuration> > </plugin> > </plugins> > </pluginManagement> > <plugins> > <plugin> > <groupId>net.alchim31.maven</groupId> > <artifactId>scala-maven-plugin</artifactId> > </plugin> > </plugins> > </build> > </project> > {code} > Using a fresh unzip of the spark distribution spark-3.0.1-bin-hadoop3.2 > {code:bash} > mvn clean package > SPARK_HOME=$HOME/dist/spark-3.0.1-bin-hadoop3.2 > $SPARK_HOME/bin/spark-submit \ > --class com.example.App \ > --conf spark.executor.userClassPathFirst=true \ > --master 'local[*]' \ > target/test-bug-1.0-SNAPSHOT.jar > {code} > The following exception is thrown > {noformat} > 20/12/17 14:31:22 ERROR Executor: Exception in task 15.0 in stage 0.0 (TID 15) > java.lang.ClassCastException: com.example.A cannot be cast to com.example.A > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.mapelements_doConsume_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.deserializetoobject_doConsume_0$(Unknown > Source) > at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage1.processNext(Unknown > Source) > at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:729) > at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:260) > at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$write$15(FileFormatWriter.scala:205) > at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:90) > at org.apache.spark.scheduler.Task.run(Task.scala:127) > at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:446) > at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1377) > at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:449) > at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > at java.lang.Thread.run(Thread.java:748) > {noformat} > This error also happens against Spark 2.4.7 (Scala 2.12). -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org