[ https://issues.apache.org/jira/browse/SPARK-39837?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Yang Jie updated SPARK-39837: ----------------------------- Description: Following log in `TPC-DS queries with SF=1` GA logs: {code:java} 2022-07-22T00:19:52.8539664Z 00:19:52.849 WARN org.apache.spark.DebugFilesystem: Leaked filesystem connection created at: 2022-07-22T00:19:52.8548926Z java.lang.Throwable 2022-07-22T00:19:52.8568135Z at org.apache.spark.DebugFilesystem$.addOpenStream(DebugFilesystem.scala:35) 2022-07-22T00:19:52.8573547Z at org.apache.spark.DebugFilesystem.open(DebugFilesystem.scala:75) 2022-07-22T00:19:52.8574108Z at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:976) 2022-07-22T00:19:52.8578427Z at org.apache.parquet.hadoop.util.HadoopInputFile.newStream(HadoopInputFile.java:69) 2022-07-22T00:19:52.8579211Z at org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:774) 2022-07-22T00:19:52.8589698Z at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:100) 2022-07-22T00:19:52.8590842Z at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:175) 2022-07-22T00:19:52.8594751Z at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$1(ParquetFileFormat.scala:340) 2022-07-22T00:19:52.8595634Z at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:211) 2022-07-22T00:19:52.8598975Z at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:272) 2022-07-22T00:19:52.8599639Z at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:118) 2022-07-22T00:19:52.8602839Z at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:583) 2022-07-22T00:19:52.8603625Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage5.columnartorow_nextBatch_0$(Unknown Source) 2022-07-22T00:19:52.8606618Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage5.processNext(Unknown Source) 2022-07-22T00:19:52.8609954Z at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) 2022-07-22T00:19:52.8620028Z at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) 2022-07-22T00:19:52.8623148Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) 2022-07-22T00:19:52.8623812Z at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140) 2022-07-22T00:19:52.8627344Z at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) 2022-07-22T00:19:52.8628031Z at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:101) 2022-07-22T00:19:52.8637881Z at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) 2022-07-22T00:19:52.8638603Z at org.apache.spark.scheduler.Task.run(Task.scala:139) 2022-07-22T00:19:52.8644696Z at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) 2022-07-22T00:19:52.8645352Z at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1490) 2022-07-22T00:19:52.8649598Z at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) 2022-07-22T00:19:52.8650238Z at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 2022-07-22T00:19:52.8657783Z at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 2022-07-22T00:19:52.8658260Z at java.lang.Thread.run(Thread.java:750){code} Actions have similar to log: * [https://github.com/apache/spark/runs/7460003953?check_suite_focus=true] * [https://github.com/apache/spark/runs/7459868605?check_suite_focus=true] * [https://github.com/apache/spark/runs/7460262731?check_suite_focus=true] was: Following log in `TPC-DS queries with SF=1` GA logs: {code:java} 2022-07-22T00:48:19.8046575Z 00:48:19.800 WARN org.apache.spark.DebugFilesystem: Leaked filesystem connection created at: 2022-07-22T00:48:19.8183197Z java.lang.Throwable 2022-07-22T00:48:19.8209541Z at org.apache.spark.DebugFilesystem$.addOpenStream(DebugFilesystem.scala:35) 2022-07-22T00:48:19.8364870Z at org.apache.spark.DebugFilesystem.open(DebugFilesystem.scala:75) 2022-07-22T00:48:19.8429477Z at org.apache.hadoop.fs.FileSystem.open(FileSystem.java:976) 2022-07-22T00:48:19.8440381Z at org.apache.parquet.hadoop.util.HadoopInputFile.newStream(HadoopInputFile.java:69) 2022-07-22T00:48:19.8463114Z at org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:774) 2022-07-22T00:48:19.8483110Z at org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:100) 2022-07-22T00:48:19.8492740Z at org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:175) 2022-07-22T00:48:19.8507149Z at org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$1(ParquetFileFormat.scala:340) 2022-07-22T00:48:19.8525518Z at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:211) 2022-07-22T00:48:19.8536791Z at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:272) 2022-07-22T00:48:19.8542997Z at org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:118) 2022-07-22T00:48:19.8548773Z at org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:583) 2022-07-22T00:48:19.8552000Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage5.columnartorow_nextBatch_0$(Unknown Source) 2022-07-22T00:48:19.8561197Z at org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage5.processNext(Unknown Source) 2022-07-22T00:48:19.8564920Z at org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) 2022-07-22T00:48:19.8570921Z at org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) 2022-07-22T00:48:19.8578211Z at scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) 2022-07-22T00:48:19.8581739Z at org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140) 2022-07-22T00:48:19.8588053Z at org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) 2022-07-22T00:48:19.8591953Z at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:101) 2022-07-22T00:48:19.8599896Z at org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) 2022-07-22T00:48:19.8605778Z at org.apache.spark.scheduler.Task.run(Task.scala:139) 2022-07-22T00:48:19.8609467Z at org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) 2022-07-22T00:48:19.8610083Z at org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1490) 2022-07-22T00:48:19.8614645Z at org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) 2022-07-22T00:48:19.8616327Z at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) 2022-07-22T00:48:19.8620080Z at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) 2022-07-22T00:48:19.8620695Z at java.lang.Thread.run(Thread.java:750) {code} Actions have similar to log: * [https://github.com/apache/spark/runs/7460003953?check_suite_focus=true] * [https://github.com/apache/spark/runs/7459868605?check_suite_focus=true] * https://github.com/apache/spark/runs/7460262731?check_suite_focus=true > Filesystem leak when running `TPC-DS queries with SF=1` > ------------------------------------------------------- > > Key: SPARK-39837 > URL: https://issues.apache.org/jira/browse/SPARK-39837 > Project: Spark > Issue Type: Bug > Components: Tests > Affects Versions: 3.4.0 > Reporter: Yang Jie > Priority: Major > > Following log in `TPC-DS queries with SF=1` GA logs: > > {code:java} > 2022-07-22T00:19:52.8539664Z 00:19:52.849 WARN > org.apache.spark.DebugFilesystem: Leaked filesystem connection created at: > 2022-07-22T00:19:52.8548926Z java.lang.Throwable > 2022-07-22T00:19:52.8568135Z at > org.apache.spark.DebugFilesystem$.addOpenStream(DebugFilesystem.scala:35) > 2022-07-22T00:19:52.8573547Z at > org.apache.spark.DebugFilesystem.open(DebugFilesystem.scala:75) > 2022-07-22T00:19:52.8574108Z at > org.apache.hadoop.fs.FileSystem.open(FileSystem.java:976) > 2022-07-22T00:19:52.8578427Z at > org.apache.parquet.hadoop.util.HadoopInputFile.newStream(HadoopInputFile.java:69) > 2022-07-22T00:19:52.8579211Z at > org.apache.parquet.hadoop.ParquetFileReader.<init>(ParquetFileReader.java:774) > 2022-07-22T00:19:52.8589698Z at > org.apache.spark.sql.execution.datasources.parquet.SpecificParquetRecordReaderBase.initialize(SpecificParquetRecordReaderBase.java:100) > 2022-07-22T00:19:52.8590842Z at > org.apache.spark.sql.execution.datasources.parquet.VectorizedParquetRecordReader.initialize(VectorizedParquetRecordReader.java:175) > 2022-07-22T00:19:52.8594751Z at > org.apache.spark.sql.execution.datasources.parquet.ParquetFileFormat.$anonfun$buildReaderWithPartitionValues$1(ParquetFileFormat.scala:340) > 2022-07-22T00:19:52.8595634Z at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.org$apache$spark$sql$execution$datasources$FileScanRDD$$anon$$readCurrentFile(FileScanRDD.scala:211) > 2022-07-22T00:19:52.8598975Z at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.nextIterator(FileScanRDD.scala:272) > 2022-07-22T00:19:52.8599639Z at > org.apache.spark.sql.execution.datasources.FileScanRDD$$anon$1.hasNext(FileScanRDD.scala:118) > 2022-07-22T00:19:52.8602839Z at > org.apache.spark.sql.execution.FileSourceScanExec$$anon$1.hasNext(DataSourceScanExec.scala:583) > 2022-07-22T00:19:52.8603625Z at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage5.columnartorow_nextBatch_0$(Unknown > Source) > 2022-07-22T00:19:52.8606618Z at > org.apache.spark.sql.catalyst.expressions.GeneratedClass$GeneratedIteratorForCodegenStage5.processNext(Unknown > Source) > 2022-07-22T00:19:52.8609954Z at > org.apache.spark.sql.execution.BufferedRowIterator.hasNext(BufferedRowIterator.java:43) > 2022-07-22T00:19:52.8620028Z at > org.apache.spark.sql.execution.WholeStageCodegenExec$$anon$1.hasNext(WholeStageCodegenExec.scala:760) > 2022-07-22T00:19:52.8623148Z at > scala.collection.Iterator$$anon$10.hasNext(Iterator.scala:460) > 2022-07-22T00:19:52.8623812Z at > org.apache.spark.shuffle.sort.BypassMergeSortShuffleWriter.write(BypassMergeSortShuffleWriter.java:140) > 2022-07-22T00:19:52.8627344Z at > org.apache.spark.shuffle.ShuffleWriteProcessor.write(ShuffleWriteProcessor.scala:59) > 2022-07-22T00:19:52.8628031Z at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:101) > 2022-07-22T00:19:52.8637881Z at > org.apache.spark.scheduler.ShuffleMapTask.runTask(ShuffleMapTask.scala:53) > 2022-07-22T00:19:52.8638603Z at > org.apache.spark.scheduler.Task.run(Task.scala:139) > 2022-07-22T00:19:52.8644696Z at > org.apache.spark.executor.Executor$TaskRunner.$anonfun$run$3(Executor.scala:548) > 2022-07-22T00:19:52.8645352Z at > org.apache.spark.util.Utils$.tryWithSafeFinally(Utils.scala:1490) > 2022-07-22T00:19:52.8649598Z at > org.apache.spark.executor.Executor$TaskRunner.run(Executor.scala:551) > 2022-07-22T00:19:52.8650238Z at > java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) > 2022-07-22T00:19:52.8657783Z at > java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) > 2022-07-22T00:19:52.8658260Z at java.lang.Thread.run(Thread.java:750){code} > > > Actions have similar to log: > * [https://github.com/apache/spark/runs/7460003953?check_suite_focus=true] > * [https://github.com/apache/spark/runs/7459868605?check_suite_focus=true] > * [https://github.com/apache/spark/runs/7460262731?check_suite_focus=true] > > -- This message was sent by Atlassian Jira (v8.20.10#820010) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org