[ https://issues.apache.org/jira/browse/SPARK-26710?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
xubo245 updated SPARK-26710: ---------------------------- Description: ImageSchemaSuite and org.apache.spark.ml.source.image.ImageFileFormatSuite has some errors when running it in local laptop {code:java} execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#17L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#10] org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#17L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#10] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) at org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$2(ImageSchemaSuite.scala:53) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:104) at org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184) at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289) at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196) at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178) at org.scalatest.FunSuite.runTest(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229) at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:396) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384) at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:379) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461) at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229) at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228) at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) at org.scalatest.Suite.run(Suite.scala:1147) at org.scalatest.Suite.run$(Suite.scala:1129) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233) at org.scalatest.SuperEngine.runImpl(Engine.scala:521) at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233) at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232) at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:53) at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:53) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1340) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1334) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:1031) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:1010) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010) at org.scalatest.tools.Runner$.run(Runner.scala:850) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) Caused by: org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/Users/xubo/Desktop/xubo/git/spark/common/data/mllib/images/origin at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:323) at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:265) at org.apache.spark.input.StreamFileInputFormat.setMinPartitions(PortableDataStream.scala:51) at org.apache.spark.rdd.BinaryFileRDD.getPartitions(BinaryFileRDD.scala:51) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.prepareShuffleDependency(ShuffleExchangeExec.scala:344) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:102) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:138) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 75 more execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#26L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#19] org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#26L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#19] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) at org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$3(ImageSchemaSuite.scala:70) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:104) at org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184) at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289) at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196) at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178) at org.scalatest.FunSuite.runTest(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229) at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:396) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384) at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:379) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461) at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229) at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228) at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) at org.scalatest.Suite.run(Suite.scala:1147) at org.scalatest.Suite.run$(Suite.scala:1129) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233) at org.scalatest.SuperEngine.runImpl(Engine.scala:521) at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233) at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232) at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:53) at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:53) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1340) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1334) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:1031) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:1010) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010) at org.scalatest.tools.Runner$.run(Runner.scala:850) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) Caused by: org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/Users/xubo/Desktop/xubo/git/spark/common/data/mllib/images/origin at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:323) at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:265) at org.apache.spark.input.StreamFileInputFormat.setMinPartitions(PortableDataStream.scala:51) at org.apache.spark.rdd.BinaryFileRDD.getPartitions(BinaryFileRDD.scala:51) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:87) at org.apache.spark.rdd.RDD.$anonfun$dependencies$2(RDD.scala:243) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:241) at org.apache.spark.rdd.ShuffledRDD.getPreferredLocations(ShuffledRDD.scala:98) at org.apache.spark.rdd.RDD.$anonfun$preferredLocations$2(RDD.scala:278) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:278) at org.apache.spark.scheduler.DAGScheduler.getPreferredLocsInternal(DAGScheduler.scala:2045) at org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:2019) at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1743) at org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) at org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.$anonfun$getAllPrefLocs$1(CoalescedRDD.scala:196) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) at org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.<init>(CoalescedRDD.scala:188) at org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.prepareShuffleDependency(ShuffleExchangeExec.scala:344) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:102) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:138) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 75 more execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#35L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#28] org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#35L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#28] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) at org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$4(ImageSchemaSuite.scala:75) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) {code} was: ImageSchemaSuite has some errors when running it in local laptop {code:java} execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#17L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#10] org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#17L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#10] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) at org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$2(ImageSchemaSuite.scala:53) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:104) at org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184) at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289) at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196) at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178) at org.scalatest.FunSuite.runTest(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229) at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:396) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384) at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:379) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461) at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229) at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228) at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) at org.scalatest.Suite.run(Suite.scala:1147) at org.scalatest.Suite.run$(Suite.scala:1129) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233) at org.scalatest.SuperEngine.runImpl(Engine.scala:521) at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233) at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232) at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:53) at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:53) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1340) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1334) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:1031) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:1010) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010) at org.scalatest.tools.Runner$.run(Runner.scala:850) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) Caused by: org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/Users/xubo/Desktop/xubo/git/spark/common/data/mllib/images/origin at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:323) at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:265) at org.apache.spark.input.StreamFileInputFormat.setMinPartitions(PortableDataStream.scala:51) at org.apache.spark.rdd.BinaryFileRDD.getPartitions(BinaryFileRDD.scala:51) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.prepareShuffleDependency(ShuffleExchangeExec.scala:344) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:102) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:138) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 75 more execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#26L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#19] org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#26L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#19] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) at org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$3(ImageSchemaSuite.scala:70) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:104) at org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184) at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196) at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289) at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196) at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178) at org.scalatest.FunSuite.runTest(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229) at org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:396) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384) at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:379) at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461) at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229) at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228) at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) at org.scalatest.Suite.run(Suite.scala:1147) at org.scalatest.Suite.run$(Suite.scala:1129) at org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233) at org.scalatest.SuperEngine.runImpl(Engine.scala:521) at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233) at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232) at org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:53) at org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:53) at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1340) at org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1334) at scala.collection.immutable.List.foreach(List.scala:392) at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:1031) at org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:1010) at org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500) at org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010) at org.scalatest.tools.Runner$.run(Runner.scala:850) at org.scalatest.tools.Runner.run(Runner.scala) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) at org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) Caused by: org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input path does not exist: file:/Users/xubo/Desktop/xubo/git/spark/common/data/mllib/images/origin at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:323) at org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:265) at org.apache.spark.input.StreamFileInputFormat.setMinPartitions(PortableDataStream.scala:51) at org.apache.spark.rdd.BinaryFileRDD.getPartitions(BinaryFileRDD.scala:51) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) at org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:87) at org.apache.spark.rdd.RDD.$anonfun$dependencies$2(RDD.scala:243) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.dependencies(RDD.scala:241) at org.apache.spark.rdd.ShuffledRDD.getPreferredLocations(ShuffledRDD.scala:98) at org.apache.spark.rdd.RDD.$anonfun$preferredLocations$2(RDD.scala:278) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:278) at org.apache.spark.scheduler.DAGScheduler.getPreferredLocsInternal(DAGScheduler.scala:2045) at org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:2019) at org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1743) at org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) at org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.$anonfun$getAllPrefLocs$1(CoalescedRDD.scala:196) at scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) at scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) at org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) at org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.<init>(CoalescedRDD.scala:188) at org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) at org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) at scala.Option.getOrElse(Option.scala:138) at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.prepareShuffleDependency(ShuffleExchangeExec.scala:344) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:102) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:138) at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) ... 75 more execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#35L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#28] org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: Exchange SinglePartition +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#35L]) +- *(1) Project +- *(1) Scan ExistingRDD[image#28] at org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) at org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) at org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) at org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) at org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) at org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) at org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) at org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) at org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) at org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) at org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) at org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) at org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) at org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) at org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) at org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$4(ImageSchemaSuite.scala:75) at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) at org.scalatest.Transformer.apply(Transformer.scala:22) at org.scalatest.Transformer.apply(Transformer.scala:20) at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) {code} > ImageSchemaSuite has some errors when running it in local laptop > ---------------------------------------------------------------- > > Key: SPARK-26710 > URL: https://issues.apache.org/jira/browse/SPARK-26710 > Project: Spark > Issue Type: Bug > Components: Tests > Affects Versions: 2.4.0 > Reporter: xubo245 > Priority: Major > > ImageSchemaSuite and org.apache.spark.ml.source.image.ImageFileFormatSuite > has some errors when running it in local laptop > {code:java} > execute, tree: > Exchange SinglePartition > +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#17L]) > +- *(1) Project > +- *(1) Scan ExistingRDD[image#10] > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: > Exchange SinglePartition > +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#17L]) > +- *(1) Project > +- *(1) Scan ExistingRDD[image#10] > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) > at > org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) > at > org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) > at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) > at > org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) > at > org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) > at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) > at > org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$2(ImageSchemaSuite.scala:53) > at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) > at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:104) > at > org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184) > at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289) > at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196) > at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1560) > at > org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229) > at > org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:396) > at scala.collection.immutable.List.foreach(List.scala:392) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384) > at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:379) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461) > at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229) > at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) > at org.scalatest.Suite.run(Suite.scala:1147) > at org.scalatest.Suite.run$(Suite.scala:1129) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) > at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233) > at org.scalatest.SuperEngine.runImpl(Engine.scala:521) > at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233) > at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232) > at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:53) > at > org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) > at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) > at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) > at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:53) > at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) > at > org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1340) > at > org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1334) > at scala.collection.immutable.List.foreach(List.scala:392) > at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334) > at > org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:1031) > at > org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:1010) > at > org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500) > at > org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010) > at org.scalatest.tools.Runner$.run(Runner.scala:850) > at org.scalatest.tools.Runner.run(Runner.scala) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) > Caused by: org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input > path does not exist: > file:/Users/xubo/Desktop/xubo/git/spark/common/data/mllib/images/origin > at > org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:323) > at > org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:265) > at > org.apache.spark.input.StreamFileInputFormat.setMinPartitions(PortableDataStream.scala:51) > at > org.apache.spark.rdd.BinaryFileRDD.getPartitions(BinaryFileRDD.scala:51) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.prepareShuffleDependency(ShuffleExchangeExec.scala:344) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:102) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:138) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 75 more > execute, tree: > Exchange SinglePartition > +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#26L]) > +- *(1) Project > +- *(1) Scan ExistingRDD[image#19] > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: > Exchange SinglePartition > +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#26L]) > +- *(1) Project > +- *(1) Scan ExistingRDD[image#19] > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) > at > org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) > at > org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) > at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) > at > org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) > at > org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) > at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) > at > org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$3(ImageSchemaSuite.scala:70) > at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) > at org.apache.spark.SparkFunSuite.withFixture(SparkFunSuite.scala:104) > at > org.scalatest.FunSuiteLike.invokeWithFixture$1(FunSuiteLike.scala:184) > at org.scalatest.FunSuiteLike.$anonfun$runTest$1(FunSuiteLike.scala:196) > at org.scalatest.SuperEngine.runTestImpl(Engine.scala:289) > at org.scalatest.FunSuiteLike.runTest(FunSuiteLike.scala:196) > at org.scalatest.FunSuiteLike.runTest$(FunSuiteLike.scala:178) > at org.scalatest.FunSuite.runTest(FunSuite.scala:1560) > at > org.scalatest.FunSuiteLike.$anonfun$runTests$1(FunSuiteLike.scala:229) > at > org.scalatest.SuperEngine.$anonfun$runTestsInBranch$1(Engine.scala:396) > at scala.collection.immutable.List.foreach(List.scala:392) > at org.scalatest.SuperEngine.traverseSubNodes$1(Engine.scala:384) > at org.scalatest.SuperEngine.runTestsInBranch(Engine.scala:379) > at org.scalatest.SuperEngine.runTestsImpl(Engine.scala:461) > at org.scalatest.FunSuiteLike.runTests(FunSuiteLike.scala:229) > at org.scalatest.FunSuiteLike.runTests$(FunSuiteLike.scala:228) > at org.scalatest.FunSuite.runTests(FunSuite.scala:1560) > at org.scalatest.Suite.run(Suite.scala:1147) > at org.scalatest.Suite.run$(Suite.scala:1129) > at > org.scalatest.FunSuite.org$scalatest$FunSuiteLike$$super$run(FunSuite.scala:1560) > at org.scalatest.FunSuiteLike.$anonfun$run$1(FunSuiteLike.scala:233) > at org.scalatest.SuperEngine.runImpl(Engine.scala:521) > at org.scalatest.FunSuiteLike.run(FunSuiteLike.scala:233) > at org.scalatest.FunSuiteLike.run$(FunSuiteLike.scala:232) > at > org.apache.spark.SparkFunSuite.org$scalatest$BeforeAndAfterAll$$super$run(SparkFunSuite.scala:53) > at > org.scalatest.BeforeAndAfterAll.liftedTree1$1(BeforeAndAfterAll.scala:213) > at org.scalatest.BeforeAndAfterAll.run(BeforeAndAfterAll.scala:210) > at org.scalatest.BeforeAndAfterAll.run$(BeforeAndAfterAll.scala:208) > at org.apache.spark.SparkFunSuite.run(SparkFunSuite.scala:53) > at org.scalatest.tools.SuiteRunner.run(SuiteRunner.scala:45) > at > org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13(Runner.scala:1340) > at > org.scalatest.tools.Runner$.$anonfun$doRunRunRunDaDoRunRun$13$adapted(Runner.scala:1334) > at scala.collection.immutable.List.foreach(List.scala:392) > at org.scalatest.tools.Runner$.doRunRunRunDaDoRunRun(Runner.scala:1334) > at > org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24(Runner.scala:1031) > at > org.scalatest.tools.Runner$.$anonfun$runOptionallyWithPassFailReporter$24$adapted(Runner.scala:1010) > at > org.scalatest.tools.Runner$.withClassLoaderAndDispatchReporter(Runner.scala:1500) > at > org.scalatest.tools.Runner$.runOptionallyWithPassFailReporter(Runner.scala:1010) > at org.scalatest.tools.Runner$.run(Runner.scala:850) > at org.scalatest.tools.Runner.run(Runner.scala) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.runScalaTest2(ScalaTestRunner.java:131) > at > org.jetbrains.plugins.scala.testingSupport.scalaTest.ScalaTestRunner.main(ScalaTestRunner.java:28) > Caused by: org.apache.hadoop.mapreduce.lib.input.InvalidInputException: Input > path does not exist: > file:/Users/xubo/Desktop/xubo/git/spark/common/data/mllib/images/origin > at > org.apache.hadoop.mapreduce.lib.input.FileInputFormat.singleThreadedListStatus(FileInputFormat.java:323) > at > org.apache.hadoop.mapreduce.lib.input.FileInputFormat.listStatus(FileInputFormat.java:265) > at > org.apache.spark.input.StreamFileInputFormat.setMinPartitions(PortableDataStream.scala:51) > at > org.apache.spark.rdd.BinaryFileRDD.getPartitions(BinaryFileRDD.scala:51) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) > at > org.apache.spark.rdd.ShuffledRDD.getDependencies(ShuffledRDD.scala:87) > at org.apache.spark.rdd.RDD.$anonfun$dependencies$2(RDD.scala:243) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.dependencies(RDD.scala:241) > at > org.apache.spark.rdd.ShuffledRDD.getPreferredLocations(ShuffledRDD.scala:98) > at org.apache.spark.rdd.RDD.$anonfun$preferredLocations$2(RDD.scala:278) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.preferredLocations(RDD.scala:278) > at > org.apache.spark.scheduler.DAGScheduler.getPreferredLocsInternal(DAGScheduler.scala:2045) > at > org.apache.spark.scheduler.DAGScheduler.getPreferredLocs(DAGScheduler.scala:2019) > at > org.apache.spark.SparkContext.getPreferredLocs(SparkContext.scala:1743) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.currPrefLocs(CoalescedRDD.scala:178) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.$anonfun$getAllPrefLocs$1(CoalescedRDD.scala:196) > at > scala.collection.IndexedSeqOptimized.foreach(IndexedSeqOptimized.scala:36) > at > scala.collection.IndexedSeqOptimized.foreach$(IndexedSeqOptimized.scala:33) > at scala.collection.mutable.ArrayOps$ofRef.foreach(ArrayOps.scala:198) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.getAllPrefLocs(CoalescedRDD.scala:195) > at > org.apache.spark.rdd.DefaultPartitionCoalescer$PartitionLocations.<init>(CoalescedRDD.scala:188) > at > org.apache.spark.rdd.DefaultPartitionCoalescer.coalesce(CoalescedRDD.scala:391) > at > org.apache.spark.rdd.CoalescedRDD.getPartitions(CoalescedRDD.scala:91) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at > org.apache.spark.rdd.MapPartitionsRDD.getPartitions(MapPartitionsRDD.scala:49) > at org.apache.spark.rdd.RDD.$anonfun$partitions$2(RDD.scala:256) > at scala.Option.getOrElse(Option.scala:138) > at org.apache.spark.rdd.RDD.partitions(RDD.scala:254) > at org.apache.spark.ShuffleDependency.<init>(Dependency.scala:96) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec$.prepareShuffleDependency(ShuffleExchangeExec.scala:344) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.prepareShuffleDependency(ShuffleExchangeExec.scala:102) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.$anonfun$doExecute$1(ShuffleExchangeExec.scala:138) > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:52) > ... 75 more > execute, tree: > Exchange SinglePartition > +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#35L]) > +- *(1) Project > +- *(1) Scan ExistingRDD[image#28] > org.apache.spark.sql.catalyst.errors.package$TreeNodeException: execute, tree: > Exchange SinglePartition > +- *(1) HashAggregate(keys=[], functions=[partial_count(1)], > output=[count#35L]) > +- *(1) Project > +- *(1) Scan ExistingRDD[image#28] > at > org.apache.spark.sql.catalyst.errors.package$.attachTree(package.scala:56) > at > org.apache.spark.sql.execution.exchange.ShuffleExchangeExec.doExecute(ShuffleExchangeExec.scala:129) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.InputAdapter.inputRDD(WholeStageCodegenExec.scala:488) > at > org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs(WholeStageCodegenExec.scala:429) > at > org.apache.spark.sql.execution.InputRDDCodegen.inputRDDs$(WholeStageCodegenExec.scala:428) > at > org.apache.spark.sql.execution.InputAdapter.inputRDDs(WholeStageCodegenExec.scala:472) > at > org.apache.spark.sql.execution.aggregate.HashAggregateExec.inputRDDs(HashAggregateExec.scala:154) > at > org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:719) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:131) > at > org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:155) > at > org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) > at > org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:152) > at org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:127) > at > org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:247) > at > org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:296) > at org.apache.spark.sql.Dataset.$anonfun$count$1(Dataset.scala:2756) > at > org.apache.spark.sql.Dataset.$anonfun$count$1$adapted(Dataset.scala:2755) > at > org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:3291) > at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:87) > at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:147) > at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:74) > at org.apache.spark.sql.Dataset.withAction(Dataset.scala:3287) > at org.apache.spark.sql.Dataset.count(Dataset.scala:2755) > at > org.apache.spark.ml.image.ImageSchemaSuite.$anonfun$new$4(ImageSchemaSuite.scala:75) > at org.scalatest.OutcomeOf.outcomeOf(OutcomeOf.scala:85) > at org.scalatest.OutcomeOf.outcomeOf$(OutcomeOf.scala:83) > at org.scalatest.OutcomeOf$.outcomeOf(OutcomeOf.scala:104) > at org.scalatest.Transformer.apply(Transformer.scala:22) > at org.scalatest.Transformer.apply(Transformer.scala:20) > at org.scalatest.FunSuiteLike$$anon$1.apply(FunSuiteLike.scala:186) > {code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org