[ https://issues.apache.org/jira/browse/SPARK-47106?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dongjoon Hyun updated SPARK-47106: ---------------------------------- Issue Type: Improvement (was: Test) > Plan canonicalization test serializes/deserializes class that is not > serializable > --------------------------------------------------------------------------------- > > Key: SPARK-47106 > URL: https://issues.apache.org/jira/browse/SPARK-47106 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 4.0.0 > Reporter: Parth Chandra > Priority: Minor > > The test > {code:java} > test("SPARK-23731 plans should be canonicalizable after being > (de)serialized"){code} > serializes and deserializes > {code:java} > FileSourceScanExec{code} > which is not actually serializable. In particular, > FileSourceScanExec.relation is not serializable. > The test still passes though. > The test below derived from the above shows the issue - > {code:java} > test("verify FileSourceScanExec (de)serialize") { > withSQLConf(SQLConf.USE_V1_SOURCE_LIST.key -> "parquet") { > withTempPath { path => > spark.range(1).write.parquet(path.getAbsolutePath) > val df = spark.read.parquet(path.getAbsolutePath) > val fileSourceScanExec = > df.queryExecution.sparkPlan.collectFirst { case p: > FileSourceScanExec => p }.get > val serializer = SparkEnv.get.serializer.newInstance() > val relation = serializer.serialize(fileSourceScanExec.relation) > assert(relation != null) > val deserialized = > > serializer.deserialize[FileSourceScanExec(serializer.serialize(fileSourceScanExec)) > assert(deserialized.relation != null) > } > } > }{code} > > The test fails with - > {code:java} > (file:/private/var/folders/bz/gg_fqnmj4c17j2c7mdn8ps1m0000gn/T/spark-d534d738-64f1-4eaa-9d9e-8c33374b60f1)) > - field (class: > org.apache.spark.sql.execution.datasources.HadoopFsRelation, name: location, > type: interface org.apache.spark.sql.execution.datasources.FileIndex) > - object (class > org.apache.spark.sql.execution.datasources.HadoopFsRelation, parquet) > java.io.NotSerializableException: > org.apache.spark.sql.execution.datasources.InMemoryFileIndex > Serialization stack: > - object not serializable (class: > org.apache.spark.sql.execution.datasources.InMemoryFileIndex, value: > org.apache.spark.sql.execution.datasources.InMemoryFileIndex(file:/private/var/folders/bz/gg_fqnmj4c17j2c7mdn8ps1m0000gn/T/spark-d534d738-64f1-4eaa-9d9e-8c33374b60f1)) > - field (class: > org.apache.spark.sql.execution.datasources.HadoopFsRelation, name: location, > type: interface org.apache.spark.sql.execution.datasources.FileIndex) > - object (class > org.apache.spark.sql.execution.datasources.HadoopFsRelation, parquet) > at > org.apache.spark.serializer.SerializationDebugger$.improveException(SerializationDebugger.scala:41) > at > org.apache.spark.serializer.JavaSerializationStream.writeObject(JavaSerializer.scala:49) > at > org.apache.spark.serializer.JavaSerializerInstance.serialize(JavaSerializer.scala:115) > at > org.apache.spark.sql.execution.SparkPlanSuite.$anonfun$new$11(SparkPlanSuite.scala:54) > at > org.apache.spark.sql.execution.SparkPlanSuite.$anonfun$new$11$adapted(SparkPlanSuite.scala:48) > at > org.apache.spark.sql.catalyst.plans.SQLHelper.withTempPath(SQLHelper.scala:69) > at > org.apache.spark.sql.catalyst.plans.SQLHelper.withTempPath$(SQLHelper.scala:66) > at org.apache.spark.sql.QueryTest.withTempPath(QueryTest.scala:33) > at > org.apache.spark.sql.execution.SparkPlanSuite.$anonfun$new$10(SparkPlanSuite.scala:48) > at > org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf(SQLHelper.scala:54) > at > org.apache.spark.sql.catalyst.plans.SQLHelper.withSQLConf$(SQLHelper.scala:38) > at > org.apache.spark.sql.execution.SparkPlanSuite.org$apache$spark$sql$test$SQLTestUtilsBase$$super$withSQLConf(SparkPlanSuite.scala:32) > at > org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf(SQLTestUtils.scala:266) > at > org.apache.spark.sql.test.SQLTestUtilsBase.withSQLConf$(SQLTestUtils.scala:264) > at > org.apache.spark.sql.execution.SparkPlanSuite.withSQLConf(SparkPlanSuite.scala:32) > at > org.apache.spark.sql.execution.SparkPlanSuite.$anonfun$new$9(SparkPlanSuite.scala:48) > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org