hsiang-c opened a new issue, #2121: URL: https://github.com/apache/datafusion-comet/issues/2121
### Describe the bug ```shell > Task :iceberg-spark:iceberg-spark-3.5_2.13:test TestStoragePartitionedJoins > testJoinsWithBucketingOnBinaryColumn() > catalogName = testhadoop, implementation = org.apache.iceberg.spark.SparkCatalog, config = {type=hadoop, cache-enabled=false}, planningMode = LOCAL FAILED org.apache.spark.SparkException: Job aborted due to stage failure: Task 1 in stage 134.0 failed 1 times, most recent failure: Lost task 1.0 in stage 134.0 (TID 3081) (localhost executor driver): org.apache.comet.CometNativeException: overflow at comet::errors::init::{{closure}}(__internal__:0) at std::panicking::rust_panic_with_hook(__internal__:0) at std::panicking::begin_panic_handler::{{closure}}(__internal__:0) at std::sys::backtrace::__rust_end_short_backtrace(__internal__:0) at __rustc::rust_begin_unwind(__internal__:0) at core::panicking::panic_fmt(__internal__:0) at core::option::expect_failed(__internal__:0) at arrow_select::take::take_bytes(__internal__:0) at arrow_select::take::take_impl(__internal__:0) at arrow_select::take::take(__internal__:0) at arrow_select::take::take_arrays(__internal__:0) at datafusion_physical_plan::sorts::sort::sort_batch(__internal__:0) at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0) at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0) at comet::execution::jni_api::Java_org_apache_comet_Native_executePlan::{{closure}}::{{closure}}(__internal__:0) at Java_org_apache_comet_Native_executePlan(__internal__:0) at <unknown>(__internal__:0) at org.apache.comet.Native.executePlan(Native Method) at org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2(CometExecIterator.scala:155) at org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2$adapted(CometExecIterator.scala:154) at org.apache.comet.vector.NativeUtil.getNextBatch(NativeUtil.scala:157) at org.apache.comet.CometExecIterator.$anonfun$getNextBatch$1(CometExecIterator.scala:154) at org.apache.comet.Tracing$.withTrace(Tracing.scala:31) at org.apache.comet.CometExecIterator.getNextBatch(CometExecIterator.scala:152) at org.apache.comet.CometExecIterator.hasNext(CometExecIterator.scala:203) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.cometcolumnartorow_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.smj_findNextJoinRows_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown Source) at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43) at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583) at scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583) at org.apache.spark.util.random.SamplingUtils$.reservoirSampleAndCount(SamplingUtils.scala:41) at org.apache.spark.RangePartitioner$.$anonfun$sketch$1(Partitioner.scala:322) at org.apache.spark.RangePartitioner$.$anonfun$sketch$1$adapted(Partitioner.scala:320) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:910) at org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:910) at org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) at org.apache.spark.rdd.RDD.iterator(RDD.scala:331) at org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) at org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) at org.apache.spark.scheduler.Task.run(Task.scala:141) at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) at org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) at java.base/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base/java.lang.Thread.run(Thread.java:829) Driver stacktrace: at app//org.apache.spark.scheduler.DAGScheduler.failJobAndIndependentStages(DAGScheduler.scala:2898) at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2(DAGScheduler.scala:2834) at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$abortStage$2$adapted(DAGScheduler.scala:2833) at app//scala.collection.immutable.List.foreach(List.scala:334) at app//org.apache.spark.scheduler.DAGScheduler.abortStage(DAGScheduler.scala:2833) at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1(DAGScheduler.scala:1253) at app//org.apache.spark.scheduler.DAGScheduler.$anonfun$handleTaskSetFailed$1$adapted(DAGScheduler.scala:1253) at app//scala.Option.foreach(Option.scala:437) at app//org.apache.spark.scheduler.DAGScheduler.handleTaskSetFailed(DAGScheduler.scala:1253) at app//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.doOnReceive(DAGScheduler.scala:3102) at app//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3036) at app//org.apache.spark.scheduler.DAGSchedulerEventProcessLoop.onReceive(DAGScheduler.scala:3025) at app//org.apache.spark.util.EventLoop$$anon$1.run(EventLoop.scala:49) at app//org.apache.spark.scheduler.DAGScheduler.runJob(DAGScheduler.scala:995) at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2393) at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2414) at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2433) at app//org.apache.spark.SparkContext.runJob(SparkContext.scala:2458) at app//org.apache.spark.rdd.RDD.$anonfun$collect$1(RDD.scala:1049) at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at app//org.apache.spark.rdd.RDD.withScope(RDD.scala:410) at app//org.apache.spark.rdd.RDD.collect(RDD.scala:1048) at app//org.apache.spark.RangePartitioner$.sketch(Partitioner.scala:320) at app//org.apache.spark.RangePartitioner.<init>(Partitioner.scala:187) at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec$.prepareJVMShuffleDependency(CometShuffleExchangeExec.scala:317) at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec.shuffleDependency$lzycompute(CometShuffleExchangeExec.scala:153) at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec.shuffleDependency(CometShuffleExchangeExec.scala:132) at app//org.apache.spark.sql.comet.execution.shuffle.CometShuffleExchangeExec.doExecuteColumnar(CometShuffleExchangeExec.scala:186) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:222) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246) at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243) at app//org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:218) at app//org.apache.spark.sql.comet.CometNativeExec.doExecuteColumnar(operators.scala:265) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:222) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246) at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243) at app//org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:218) at app//org.apache.spark.sql.execution.InputAdapter.doExecuteColumnar(WholeStageCodegenExec.scala:521) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeColumnar$1(SparkPlan.scala:222) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246) at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243) at app//org.apache.spark.sql.execution.SparkPlan.executeColumnar(SparkPlan.scala:218) at app//org.apache.spark.sql.comet.CometColumnarToRowExec.inputRDDs(CometColumnarToRowExec.scala:306) at app//org.apache.spark.sql.execution.WholeStageCodegenExec.doExecute(WholeStageCodegenExec.scala:751) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$execute$1(SparkPlan.scala:195) at app//org.apache.spark.sql.execution.SparkPlan.$anonfun$executeQuery$1(SparkPlan.scala:246) at app//org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at app//org.apache.spark.sql.execution.SparkPlan.executeQuery(SparkPlan.scala:243) at app//org.apache.spark.sql.execution.SparkPlan.execute(SparkPlan.scala:191) at app//org.apache.spark.sql.execution.SparkPlan.getByteArrayRdd(SparkPlan.scala:364) at app//org.apache.spark.sql.execution.SparkPlan.executeCollect(SparkPlan.scala:445) at app//org.apache.spark.sql.Dataset.collectFromPlan(Dataset.scala:4333) at app//org.apache.spark.sql.Dataset.$anonfun$collectAsList$1(Dataset.scala:3587) at app//org.apache.spark.sql.Dataset.$anonfun$withAction$2(Dataset.scala:4323) at app//org.apache.spark.sql.execution.QueryExecution$.withInternalError(QueryExecution.scala:546) at app//org.apache.spark.sql.Dataset.$anonfun$withAction$1(Dataset.scala:4321) at app//org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$6(SQLExecution.scala:125) at app//org.apache.spark.sql.execution.SQLExecution$.withSQLConfPropagated(SQLExecution.scala:201) at app//org.apache.spark.sql.execution.SQLExecution$.$anonfun$withNewExecutionId$1(SQLExecution.scala:108) at app//org.apache.spark.sql.SparkSession.withActive(SparkSession.scala:900) at app//org.apache.spark.sql.execution.SQLExecution$.withNewExecutionId(SQLExecution.scala:66) at app//org.apache.spark.sql.Dataset.withAction(Dataset.scala:4321) at app//org.apache.spark.sql.Dataset.collectAsList(Dataset.scala:3586) at app//org.apache.iceberg.spark.TestBase.sql(TestBase.java:128) at app//org.apache.iceberg.spark.TestBase.lambda$executeAndKeepPlan$3(TestBase.java:256) at app//org.apache.iceberg.spark.TestBase.executeAndKeepPlan(TestBase.java:275) at app//org.apache.iceberg.spark.TestBase.executeAndKeepPlan(TestBase.java:256) at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.lambda$assertPartitioningAwarePlan$0(TestStoragePartitionedJoins.java:640) at app//org.apache.iceberg.spark.TestBase.withSQLConf(TestBase.java:214) at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.assertPartitioningAwarePlan(TestStoragePartitionedJoins.java:637) at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.checkJoin(TestStoragePartitionedJoins.java:612) at app//org.apache.iceberg.spark.sql.TestStoragePartitionedJoins.testJoinsWithBucketingOnBinaryColumn(TestStoragePartitionedJoins.java:173) Caused by: org.apache.comet.CometNativeException: overflow at comet::errors::init::{{closure}}(__internal__:0) at std::panicking::rust_panic_with_hook(__internal__:0) at std::panicking::begin_panic_handler::{{closure}}(__internal__:0) at std::sys::backtrace::__rust_end_short_backtrace(__internal__:0) at __rustc::rust_begin_unwind(__internal__:0) at core::panicking::panic_fmt(__internal__:0) at core::option::expect_failed(__internal__:0) at arrow_select::take::take_bytes(__internal__:0) at arrow_select::take::take_impl(__internal__:0) at arrow_select::take::take(__internal__:0) at arrow_select::take::take_arrays(__internal__:0) at datafusion_physical_plan::sorts::sort::sort_batch(__internal__:0) at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0) at <datafusion_physical_plan::stream::RecordBatchStreamAdapter<S> as futures_core::stream::Stream>::poll_next(__internal__:0) at comet::execution::jni_api::Java_org_apache_comet_Native_executePlan::{{closure}}::{{closure}}(__internal__:0) at Java_org_apache_comet_Native_executePlan(__internal__:0) at <unknown>(__internal__:0) at app//org.apache.comet.Native.executePlan(Native Method) at app//org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2(CometExecIterator.scala:155) at app//org.apache.comet.CometExecIterator.$anonfun$getNextBatch$2$adapted(CometExecIterator.scala:154) at app//org.apache.comet.vector.NativeUtil.getNextBatch(NativeUtil.scala:157) at app//org.apache.comet.CometExecIterator.$anonfun$getNextBatch$1(CometExecIterator.scala:154) at app//org.apache.comet.Tracing$.withTrace(Tracing.scala:31) at app//org.apache.comet.CometExecIterator.getNextBatch(CometExecIterator.scala:152) at app//org.apache.comet.CometExecIterator.hasNext(CometExecIterator.scala:203) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.cometcolumnartorow_nextBatch_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage2.processNext(Unknown Source) at app//org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at app//org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.smj_findNextJoinRows_0$(Unknown Source) at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage3.processNext(Unknown Source) at app//org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) at app//org.apache.spark.sql.execution.WholeStageCodegenEvaluatorFactory$WholeStageCodegenPartitionEvaluator$$anon$1.hasNext(WholeStageCodegenEvaluatorFactory.scala:43) at app//scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583) at app//scala.collection.Iterator$$anon$9.hasNext(Iterator.scala:583) at app//org.apache.spark.util.random.SamplingUtils$.reservoirSampleAndCount(SamplingUtils.scala:41) at app//org.apache.spark.RangePartitioner$.$anonfun$sketch$1(Partitioner.scala:322) at app//org.apache.spark.RangePartitioner$.$anonfun$sketch$1$adapted(Partitioner.scala:320) at app//org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2(RDD.scala:910) at app//org.apache.spark.rdd.RDD.$anonfun$mapPartitionsWithIndex$2$adapted(RDD.scala:910) at app//org.apache.spark.rdd.MapPartitionsRDD.compute(MapPartitionsRDD.scala:52) at app//org.apache.spark.rdd.RDD.computeOrReadCheckpoint(RDD.scala:367) at app//org.apache.spark.rdd.RDD.iterator(RDD.scala:331) at app//org.apache.spark.scheduler.ResultTask.runTask(ResultTask.scala:93) at app//org.apache.spark.TaskContext.runTaskWithListeners(TaskContext.scala:166) at app//org.apache.spark.scheduler.Task.run(Task.scala:141) at app//org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$4(Executor.scala:621) at app//org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally(SparkErrorUtils.scala:64) at app//org.apache.spark.util.SparkErrorUtils.tryWithSafeFinally$(SparkErrorUtils.scala:61) at app//org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:94) at app//org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:624) at java.base@11.0.28/java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1128) at java.base@11.0.28/java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:628) at java.base@11.0.28/java.lang.Thread.run(Thread.java:829) ``` ### Steps to reproduce ```scala .config("spark.plugins", "org.apache.spark.CometPlugin") .config("spark.shuffle.manager", "org.apache.spark.sql.comet.execution.shuffle.CometShuffleManager") .config("spark.comet.explainFallback.enabled", "true") .config("spark.sql.iceberg.parquet.reader-type", "COMET") .config("spark.memory.offHeap.enabled", "true") .config("spark.memory.offHeap.size", "10g") .config("spark.comet.use.lazyMaterialization", "false") .config("spark.comet.schemaEvolution.enabled", "true") ``` ### Expected behavior _No response_ ### Additional context _No response_ -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: github-unsubscr...@datafusion.apache.org For additional commands, e-mail: github-h...@datafusion.apache.org