[ https://issues.apache.org/jira/browse/SPARK-43573?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Sean R. Owen resolved SPARK-43573. ---------------------------------- Fix Version/s: 3.5.0 Resolution: Fixed Issue resolved by pull request 41212 [https://github.com/apache/spark/pull/41212] > Make SparkBuilder could config the heap size of test JVM. > --------------------------------------------------------- > > Key: SPARK-43573 > URL: https://issues.apache.org/jira/browse/SPARK-43573 > Project: Spark > Issue Type: Improvement > Components: Build > Affects Versions: 3.5.0 > Reporter: jiaan.geng > Assignee: jiaan.geng > Priority: Major > Fix For: 3.5.0 > > > {code:java} > build/sbt "sql/Test/runMain <this class> --dsdgenDir <path> --location <path> > --scaleFactor 1" > {code} > causes OOM, if the scaleFactor big enough. > {code:java} > [info] 16:43:41.618 ERROR > org.apache.spark.sql.execution.datasources.FileFormatWriter: Job > job_202305181633205732292221634890857_0006 aborted. > [info] 16:43:41.627 ERROR > org.apache.spark.sql.execution.datasources.FileFormatWriter: Job > job_202305181633205732292221634890857_0006 aborted. > [info] 16:43:41.646 WARN > org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Could not delete > file:/home/ubuntu/tpcdsdata/test/catalog_sales/_tempo > rary/0/_temporary/attempt_202305181633205732292221634890857_0006_m_000010_610 > [info] 16:43:41.647 ERROR > org.apache.spark.sql.execution.datasources.FileFormatWriter: Job > job_202305181633205732292221634890857_0006 aborted. > [info] 16:43:41.647 ERROR > org.apache.spark.sql.execution.datasources.FileFormatWriter: Job > job_202305181633205732292221634890857_0006 aborted. > [info] 16:43:41.656 WARN > org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Could not delete > file:/home/ubuntu/tpcdsdata/test/catalog_sales/_tempo > rary/0/_temporary/attempt_202305181633205732292221634890857_0006_m_000014_614 > [info] 16:43:41.656 ERROR > org.apache.spark.sql.execution.datasources.FileFormatWriter: Job > job_202305181633205732292221634890857_0006 aborted. > [info] 16:43:41.668 WARN > org.apache.hadoop.mapreduce.lib.output.FileOutputCommitter: Could not delete > file:/home/ubuntu/tpcdsdata/test/catalog_sales/_tempo > rary/0/_temporary/attempt_202305181633205732292221634890857_0006_m_000002_602 > [info] 16:43:41.668 ERROR > org.apache.spark.sql.execution.datasources.FileFormatWriter: Job > job_202305181633205732292221634890857_0006 aborted. > [error] Exception in thread "main" org.apache.spark.SparkException: Job > aborted due to stage failure: Task 13 in stage 6.0 failed 1 times, most > recent fail > ure: Lost task 13.0 in stage 6.0 (TID 613) > (ip-172-31-27-53.cn-northwest-1.compute.internal executor driver): > org.apache.spark.SparkException: [TASK_WRITE_ > FAILED] Task failed while writing rows to > file:/home/ubuntu/tpcdsdata/test/catalog_sales. > [error] at > org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:788) > [error] at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) > [error] at > org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) > [error] at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890) > [error] at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890) > [error] at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > [error] at > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) > [error] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) > [error] at > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) > [error] at > org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) > [error] at org.apache.spark.scheduler.Task.run(Task.scala:139) > [error] at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554) > [error] at > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1487) > [error] at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557) > [error] at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [error] at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [error] at java.lang.Thread.run(Thread.java:750) > [error] Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded > [error] Driver stacktrace: > [error] at > org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2815) > [error] at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2751) > [error] at > org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2750) > [error] at > scala.collection.mutable.ResizableArray.foreach(ResizableArray.scala:62) > [error] at > scala.collection.mutable.ResizableArray.foreach$(ResizableArray.scala:55) > [error] at > scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:49) > [error] at > org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2750) > [error] at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1218) > [error] at > org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1218) > [error] at scala.Option.foreach(Option.scala:407) > [error] at > org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1218) > [error] at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3014) > [error] at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2953) > [error] at > org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:2942) > [error] at > org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) > [error] at > org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:983) > [error] at > org.apache.spark.SparkContext.runJob(SparkContext.scala:2285) > [error] at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.$anonfun$executeWrite$4(FileFormatWriter.scala:307) > [error] at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.writeAndCommit(FileFormatWriter.scala:271) > [error] at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeWrite(FileFormatWriter.scala:304) > [error] at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.write(FileFormatWriter.scala:190) > [error] at > org.apache.spark.sql.execution.datasources.InsertIntoHadoopFsRelationCommand.run(InsertIntoHadoopFsRelationCommand.scala:190) > [error] at > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult$lzycompute(commands.scala:113) > [error] at > org.apache.spark.sql.execution.command.DataWritingCommandExec.sideEffectResult(commands.scala:111) > [error] at > org.apache.spark.sql.execution.command.DataWritingCommandExec.executeCollect(commands.scala:125) > [error] at > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.$anonfun$applyOrElse$1(QueryExecution.scala:98) > [error] at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:118) > [error] at > org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:195) > [error] at > org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:103) > [error] at > org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:825) > [error] at > org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:65) > [error] at > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:98) > [error] at > org.apache.spark.sql.execution.QueryExecution$$anonfun$eagerlyExecuteCommands$1.applyOrElse(QueryExecution.scala:94) > [error] at > org.apache.spark.sql.catalyst.trees.TreeNode.$anonfun$transformDownWithPruning$1(TreeNode.scala:512) > [error] at > org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(TreeNode.scala:104) > [error] at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDownWithPruning(TreeNode.scala:512) > [error] at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.org$apache$spark$sql$catalyst$plans$logical$AnalysisHelper$$super$transformDownWithPruning(LogicalPlan.scala:32) > [error] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning(AnalysisHelper.scala:267) > [error] at > org.apache.spark.sql.catalyst.plans.logical.AnalysisHelper.transformDownWithPruning$(AnalysisHelper.scala:263) > [error] at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) > [error] at > org.apache.spark.sql.catalyst.plans.logical.LogicalPlan.transformDownWithPruning(LogicalPlan.scala:32) > [error] at > org.apache.spark.sql.catalyst.trees.TreeNode.transformDown(TreeNode.scala:488) > [error] at > org.apache.spark.sql.execution.QueryExecution.eagerlyExecuteCommands(QueryExecution.scala:94) > [error] at > org.apache.spark.sql.execution.QueryExecution.commandExecuted$lzycompute(QueryExecution.scala:81) > [error] at > org.apache.spark.sql.execution.QueryExecution.commandExecuted(QueryExecution.scala:79) > [error] at > org.apache.spark.sql.execution.QueryExecution.assertCommandExecuted(QueryExecution.scala:133) > [error] at > org.apache.spark.sql.DataFrameWriter.runCommand(DataFrameWriter.scala:856) > [error] at > org.apache.spark.sql.DataFrameWriter.saveToV1Source(DataFrameWriter.scala:387) > [error] at > org.apache.spark.sql.DataFrameWriter.saveInternal(DataFrameWriter.scala:360) > [error] at > org.apache.spark.sql.DataFrameWriter.save(DataFrameWriter.scala:239) > [error] at > org.apache.spark.sql.TPCDSTables$Table.genData(GenTPCDSData.scala:246) > [error] at > org.apache.spark.sql.TPCDSTables.$anonfun$genData$10(GenTPCDSData.scala:276) > [error] at > org.apache.spark.sql.TPCDSTables.$anonfun$genData$10$adapted(GenTPCDSData.scala:273) > [error] at scala.collection.immutable.List.foreach(List.scala:431) > [error] at > org.apache.spark.sql.TPCDSTables.genData(GenTPCDSData.scala:273) > [error] at > org.apache.spark.sql.GenTPCDSData$.main(GenTPCDSData.scala:440) > [error] at org.apache.spark.sql.GenTPCDSData.main(GenTPCDSData.scala) > [error] Caused by: org.apache.spark.SparkException: [TASK_WRITE_FAILED] Task > failed while writing rows to file:/home/ubuntu/tpcdsdata/test/catalog_sales. > [error] at > org.apache.spark.sql.errors.QueryExecutionErrors$.taskFailedWhileWritingRowsError(QueryExecutionErrors.scala:788) > [error] at > org.apache.spark.sql.execution.datasources.FileFormatWriter$.executeTask(FileFormatWriter.scala:420) > [error] at > org.apache.spark.sql.execution.datasources.WriteFilesExec.$anonfun$doExecuteWrite$1(WriteFiles.scala:100) > [error] at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2(RDD.scala:890) > [error] at > org.apache.spark.rdd.RDD.$anonfun$mapPartitionsInternal$2$adapted(RDD.scala:890) > [error] at > org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) > [error] at > org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:364) > [error] at org.apache.spark.rdd.RDD.iterator(RDD.scala:328) > [error] at > org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:92) > [error] at > org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:161) > [error] at org.apache.spark.scheduler.Task.run(Task.scala:139) > [error] at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:554) > [error] at > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1487) > [error] at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:557) > [error] at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > [error] at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > [error] at java.lang.Thread.run(Thread.java:750) > [error] Caused by: java.lang.OutOfMemoryError: GC overhead limit exceeded > {code} -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org